while (dfs_stack.empty() == false) {
Sorted.push_back(dfs_stack.top());
- Sorted.back()->setExecLoc(
- {Sorted.size(), (node_list.size() * 2) - Sorted.size() + 1});
dfs_stack.pop();
}
*/
class GraphNode {
public:
+ /**
+ * @brief Provides the time/order at which the node will be executed.
+ * @details This time will be finalized once the graph has been calculated.
+ * The three times given indicate the order with which the below three
+ * operations for each node are executed:
+ * 1. Forwarding
+ * 2. calcGradient
+ * 3. calcDerivative
+ * One constraint the three times is that they must be sorted in ascending
+ * order. This ensures that the operations are executed in the order of their
+ * listing.
+ */
+ typedef std::tuple<unsigned int, unsigned int, unsigned int> ExecutionOrder;
+
/**
* @brief Destructor of Layer Class
*/
* @details The two values represents the value for forward and backward
* respectively
*/
- virtual std::pair<unsigned int, unsigned int> getExecLoc() const = 0;
+ virtual ExecutionOrder getExecutionOrder() const = 0;
/**
* @brief set the execution order/location of this node
*
- * @param exec_loc the execution order/location of this node
+ * @param exec_order the execution order/location of this node
* @details The two values represents the value for forward and backward
* respectively
*/
- virtual void setExecLoc(std::pair<unsigned int, unsigned int> exec_loc) = 0;
+ virtual void setExecutionOrder(ExecutionOrder exec_order_) = 0;
};
/**
graph.topologicalSort();
countNonTrainableLayersAtBegin();
+ setExecutionOrder();
status = checkCompiledGraph();
NN_RETURN_STATUS();
return status;
}
+void NetworkGraph::setExecutionOrder() {
+ auto node_count = graph.size();
+ /** @todo: remove backwarding count for non-trainble layers */
+ for (auto iter = cbegin(); iter != cend(); iter++) {
+ auto &node = *iter;
+ auto order_idx = iter - cbegin();
+ auto forward_order = order_idx;
+ auto calc_gradient_order = (node_count * 3) - (order_idx * 2) + 1;
+ /** calc derivative is called right after calc_gradient */
+ auto calc_derivative_order = calc_gradient_order + 1;
+ node->setExecutionOrder(
+ {forward_order, calc_gradient_order, calc_derivative_order});
+ }
+}
+
void NetworkGraph::updateConnectionName(const std::string &from,
const std::string &to) {
for (auto iter = cbegin(); iter != cend(); iter++) {
* match and merging loss layers with activation layers if needed.
*/
void finalizeLossLayer();
+
+ /**
+ * @brief Set the order of execution for all the nodes in the graph
+ *
+ * @details This sets the order of execution using the order from the
+ * topological sort. The order of forwarding matches the topological sort. The
+ * order for backwarding is in the exact reverse order. The calcDerivative()
+ * is expected to be called right after calcGradient().
+ */
+ void setExecutionOrder();
};
} // namespace nntrainer
class Weight;
class Var_Grad;
-/**
- * @brief define the lifespan of the given tensor to reduce peak memory
- *
- */
-enum TensorLifespan {
- FORWARD_FUNC_LIFESPAN, /**< tensor must not be reset before during the
- forward function call, eg. temporary tensors
- needed during forward operations */
- BACKWARD_FUNC_LIFESPAN, /**< tensor must not be reset before during the
- backward function call, eg. temporary tensors
- needed during backward operations */
- ITERATION_LIFESPAN, /**< tensor must not be reset until the owning layer
- finishes its execution in the current iteration,
- eg. hidden memory/cells of RNN */
- EPOCH_LIFESPAN, /**< tensor must not be reset before the epoch ends */
- MAX_LIFESPAN, /**< tensor must not be reset until the end of the model
- execution, eg. layer weights */
-};
-
/**
* @class Layer Context class for all layers
* @brief Class for Layer context
num_outputs(num_out),
name(n) {}
+ /**
+ * @brief get name by the layer
+ *
+ * @return name of the layer
+ */
const std::string &getName() const { return name; }
/**
const Tensor::Initializer init = Tensor::Initializer::NONE,
bool trainable = false,
TensorLifespan lifespan = ITERATION_LIFESPAN) {
- tensors_spec.emplace_back(dim, init, trainable, name);
+ tensors_spec.emplace_back(dim, init, trainable, name, lifespan);
return tensors_spec.size() - 1;
}
activation_type(ActivationType::ACT_NONE),
layer_node_props(new PropsType(props::Name(), props::Flatten(),
props::Distribute(), props::Trainable(),
- props::Loss())), regularization_loss(0.0f), exec_loc({0, 0}) {
+ props::Loss())),
+ regularization_loss(0.0f),
+ exec_order({0, 0, 0}) {
if (layer && layer->getType() == TimeDistLayer::type) {
std::get<props::Distribute>(*layer_node_props).set(true);
}
*
* @retval the execution order/location of this node
*/
- std::pair<unsigned int, unsigned int> getExecLoc() const { return exec_loc; }
+ ExecutionOrder getExecutionOrder() const { return exec_order; }
/**
* @brief set the execution order/location of this node
*
- * @param exec_loc the execution order/location of this node
+ * @param exec_order the execution order/location of this node
*/
- virtual void setExecLoc(std::pair<unsigned int, unsigned int> exec_loc_) {
- exec_loc = exec_loc_;
+ void setExecutionOrder(ExecutionOrder exec_order_) {
+ exec_order = exec_order_;
}
/**
*/
std::unique_ptr<PropsType> layer_node_props; /**< properties for the node */
float regularization_loss;
- std::pair<int, int> exec_loc; /**< order/location of execution for this node
- in forward and backward */
+ ExecutionOrder exec_order; /**< order/location of execution for this node
+ in forward and backwarding operations */
/**
* @brief setProperty by PropertyType
/**
* @brief free layers
*/
-NeuralNetwork::~NeuralNetwork() {
- model_graph.reset();
-
- std::for_each(data_buffers.begin(), data_buffers.end(), [](auto &buffers) {
- if (buffers) {
- buffers->clear();
- }
- });
-}
+NeuralNetwork::~NeuralNetwork() { model_graph.reset(); }
void NeuralNetwork::setLabels(sharedConstTensors label) {
auto fill_label = [&label](auto const &layer_node) {
[&count, &node](auto const &elem) {
return std::make_tuple(elem, Tensor::Initializer::NONE, true,
node.getName() + std::string(":input") +
- std::to_string(count++));
+ std::to_string(count++),
+ ITERATION_LIFESPAN);
});
return requestTensors<Var_Grad>(node, inputs_spec, inputs_v2);
}
[&count, &node](auto const &elem) {
return std::make_tuple(elem, Tensor::Initializer::NONE, true,
node.getName() + std::string(":output") +
- std::to_string(count++));
+ std::to_string(count++),
+ ITERATION_LIFESPAN);
});
return requestTensors<Var_Grad>(node, outputs_spec, outputs_v2);
}
std::vector<std::vector<std::unique_ptr<Var_Grad>>>
tensors_v2; /**< extra tensors required by the layers */
- std::unordered_map<std::string, std::pair<unsigned int, unsigned int>>
- tensor_exec_loc; /**< stores the order/location at which a given tensor is
+ std::unordered_map<std::string, GraphNode::ExecutionOrder>
+ tensor_exec_order; /**< stores the order/location at which a given tensor is
going to be executed when the network is forwarded and
- abackwarded */
+ backwarded */
/**< Weights of all the layer in the model to be managed */
std::vector<std::vector<std::reference_wrapper<Weight>>> weights;
* @todo maybe requesting tensor with same name should mean reusing the
* tensor than giving the error
*/
- if (tensor_exec_loc.find(ts_name) != tensor_exec_loc.end())
+ if (tensor_exec_order.find(ts_name) != tensor_exec_order.end())
throw std::invalid_argument("Requesting tensor " + ts_name +
" with same name");
/**
- * @todo set the exec_loc based on the set lifespan */
- tensor_exec_loc[ts_name] = node.getExecLoc();
+ * @todo set the exec_order based on the set lifespan from the spec
+ */
+ tensor_exec_order[ts_name] = node.getExecutionOrder();
}
std::transform(tensors_list.begin(), tensors_list.end(),
UNKNOWN /**< Unknown */
};
+/**
+ * @brief define the lifespan of the given tensor to reduce peak memory
+ *
+ */
+enum TensorLifespan {
+ FORWARD_FUNC_LIFESPAN, /**< tensor must not be reset before during the
+ forward function call, eg. temporary tensors
+ needed during forward operations */
+ BACKWARD_FUNC_LIFESPAN, /**< tensor must not be reset before during the
+ backward function call, eg. temporary tensors
+ needed during backward operations */
+ ITERATION_LIFESPAN, /**< tensor must not be reset until the owning layer
+ finishes its execution in the current iteration,
+ eg. hidden memory/cells of RNN */
+ EPOCH_LIFESPAN, /**< tensor must not be reset before the epoch ends */
+ MAX_LIFESPAN, /**< tensor must not be reset until the end of the model
+ execution, eg. layer weights */
+};
+
/**
* @brief Specification of the Weight as a tensor wrapper
*
* @brief Specification of the Var_Grad (trainable tensor) as a tensor wrapper
*
* @details The tuple values are dimension, initializer, need_gradient property,
- * and the name of the tensor object.
+ * the name, and lifespan of the Var_Grad object.
*/
-typedef std::tuple<TensorDim, Tensor::Initializer, bool, const std::string>
+typedef std::tuple<TensorDim, Tensor::Initializer, bool, const std::string,
+ TensorLifespan>
VarGradSpec;
} // namespace nntrainer