This patch adds support for in-place execution of the multiout layer.
Corresponding unittests are also added.
Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
return lnode->getType() == FlattenLayer::type;
};
+ /** layers which behave as a no-op but shares memory among parallel nodes -
+ * multiout */
+ auto no_op_shared = [](const std::shared_ptr<LayerNode> &lnode) {
+ return lnode->getType() == MultiOutLayer::type;
+ };
+
/**
* layers whose backwarding is not dependent on input/output but only its
* derivatives and weights, if any - batch normalization
* 2. if the layer is not supporting backwarding, there is no dependency
* requirement with other nodes for backwarding.
*/
- if (no_op(lnode) || !lnode->supportBackwarding())
+ if (no_op(lnode) || no_op_shared(lnode) || !lnode->supportBackwarding())
return true;
/**
* - if any of the input layer is already operating in-place (where it
* modifies its input in-place), then this layer cannot operate in-place.
*
- * @todo @note This logic is prone to change as more layers are allowed to
+ * @note This logic is prone to change as more layers are allowed to
* work in-place such as multi-out layer, concat layer, split layer, addition
* layer, dropout layer, etc.
+ *
+ * @todo This logic sets layers to in-place one-by-one as they arrive. However
+ * setting some layers to in-place can save more memory than others (like
+ * multiout layer vs activaiton layer). The layers need to sorted based on the
+ * memory save they provide and then make them in-place in that order.
*/
if (lnode->getType() == ActivationLayer::type ||
lnode->getType() == BatchNormalizationLayer::type) {
}
}
+/**
+ * @brief Set the Inplace Shared Memory Config By Layer object
+ *
+ * @param lnode layer node object
+ * @param shared_var if the variable should be shared
+ * @param shared_grad if the gradient should be shared
+ */
+static void
+setInplaceSharedMemoryConfigByLayer(const std::shared_ptr<LayerNode> &lnode,
+ bool &shared_var, bool &shared_grad) {
+ if (lnode->getType() == MultiOutLayer::type) {
+ shared_var = true;
+ shared_grad = false;
+ } else {
+ shared_var = true;
+ shared_grad = true;
+ }
+}
+
std::vector<Var_Grad *>
NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
const std::vector<Var_Grad *> &prev_inputs) {
/** In-Place optimizations */
std::vector<std::string> inputs_name;
+ bool shared_var = false, shared_grad = false;
if (lnode->executeInPlace()) {
std::transform(inputs.begin(), inputs.end(),
std::back_inserter(inputs_name),
[](const Var_Grad *val) { return val->getName(); });
+ setInplaceSharedMemoryConfigByLayer(lnode, shared_var, shared_grad);
}
/**
* allocated input. This is necesary for manager to know when this output node
* is going to be used with in-place optimizations.
*/
- const std::vector<Var_Grad *> &outputs = tensor_manager->requestOutputs(
- gnode, init_context.getOutputDimensions(), inputs_name);
+ const std::vector<Var_Grad *> &outputs =
+ tensor_manager->requestOutputs(gnode, init_context.getOutputDimensions(),
+ inputs_name, shared_var, shared_grad);
/** create shared weight names if requested */
std::vector<std::string> shared_weight_names;
}
void MultiOutLayer::forwarding(RunLayerContext &context, bool training) {
- const Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
- for (unsigned int idx = 0; idx < context.getNumOutputs(); ++idx) {
- context.getOutput(idx).fill(input_);
+ if (!context.executeInPlace()) {
+ const Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
+ for (unsigned int idx = 0; idx < context.getNumOutputs(); ++idx) {
+ context.getOutput(idx).fill(input_);
+ }
}
}
* @author Jijoong Moon <jijoong.moon@samsung.com>
* @bug No known bugs except for NYI items
* @brief This is Multi Output Layer Class for Neural Network
- *
- * @todo Support inplace for this layer
*/
#ifndef __MULTIOUT_LAYER_H__
bool supportBackwarding() const override { return true; };
/**
+ * @copydoc Layer::supportInPlace()
+ */
+ bool supportInPlace() const override { return true; }
+
+ /**
* @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
*/
void exportTo(Exporter &exporter,
#include <bn_layer.h>
#include <layer_node.h>
#include <manager.h>
+#include <multiout_layer.h>
#include <nntrainer_log.h>
#include <optimized_v1_planner.h>
#include <util_func.h>
node.getExecutionOrder();
std::vector<unsigned int> var_exec_order(
{forwarding_order, calcGradient_order});
+
+ if (node.getType() == MultiOutLayer::type)
+ var_exec_order = {forwarding_order};
+
std::vector<unsigned int> grad_exec_order({calcDerivative_order});
/** batch normalization layer uses input in forwarding only */
std::vector<Var_Grad *>
Manager::requestOutputs(const GraphNode &node,
const std::vector<TensorDim> &outputs_dim,
- const std::vector<std::string> &inputs_name) {
+ const std::vector<std::string> &inputs_name,
+ bool shared_var, bool shared_grad) {
const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
node.getExecutionOrder();
std::vector<unsigned int> var_exec_order({forwarding_order});
std::vector<Var_Grad *> ret;
size_t current_size = outputs_v2.size();
+ shared_var = shared_var & !inputs_name.empty();
+ shared_grad = shared_grad & !inputs_name.empty();
for (unsigned int idx = 0; idx < outputs_dim.size(); idx++) {
auto const &dim = outputs_dim[idx];
Tensor *var = nullptr, *grad = nullptr;
const std::string &var_name =
node.getName() + std::string(":output") + std::to_string(idx);
- if (!inputs_name.empty()) {
+ std::string shared_name = "";
+ if (inputs_name.size() == 1)
+ shared_name = inputs_name[0];
+ else if (inputs_name.size() == 1)
+ shared_name = inputs_name[idx];
+
+ if (shared_var) {
+ /** request shared tensor for variable */
var = tensor_pool.requestPrerequestedTensor(
dim, /// tensor dim
var_exec_order, var_ls, var_name,
- inputs_name[idx], /// name
+ shared_name, /// name
Tensor::Initializer::NONE /// tensor initializer
);
-
- /** skip requesting tensor for label */
- if (!node.getOutputConnections().empty()) {
- grad = tensor_pool.requestPrerequestedTensor(
- dim, /// tensor dim
- grad_exec_order, grad_ls,
- var_name + Var_Grad::grad_suffix, /// name
- inputs_name[idx] + Var_Grad::grad_suffix, /// shared name
- Tensor::Initializer::ZEROS /// tensor initializer
- );
- } else {
- /** requesting externally allocated tensor for label */
- grad = tensor_pool.requestExternallyAllocateTensor(
- dim, /// tensor dim
- var_name + Var_Grad::grad_suffix, /// name
- Tensor::Initializer::ZEROS /// tensor initializer
- );
- }
} else {
+ /** request new tensor for variable */
var = tensor_pool.requestTensor(
dim, /// tensor dim
var_exec_order, var_ls,
var_name, /// name
Tensor::Initializer::NONE /// tensor initializer
);
+ }
+ if (shared_grad) {
+ /** request share tensor for gradient */
+ grad = tensor_pool.requestPrerequestedTensor(
+ dim, /// tensor dim
+ grad_exec_order, grad_ls,
+ var_name + Var_Grad::grad_suffix, /// name
+ shared_name + Var_Grad::grad_suffix, /// shared name
+ Tensor::Initializer::ZEROS /// tensor initializer
+ );
+ } else {
+ /** request new tensor for gradient */
if (!node.getOutputConnections().empty()) {
+ /** request a simple tensor */
grad = tensor_pool.requestTensor(
dim, /// tensor dim
grad_exec_order, grad_ls,
std::vector<Var_Grad *>
requestOutputs(const GraphNode &node,
const std::vector<TensorDim> &outputs_dim,
- const std::vector<std::string> &inputs_name = {});
+ const std::vector<std::string> &inputs_name = {},
+ bool shared_var = true, bool shared_grad = true);
/**
* @brief Get all the weights
)
multi_gru_layer_tc(1,2)(file_name="multi_gru_return_sequence.info")
multi_gru_layer_tc(2,2)(file_name="multi_gru_return_sequence_with_batch.info")
+
+ def multiout_test():
+ # x -> [a, b] -> c
+ x = K.Input(shape=(1, 10), name="x")
+ fc = K.layers.Dense(2, name="fc")(x)
+ b0, a0 = MultiOutLayer(num_output=2)(fc)
+ fc1 = K.layers.Dense(2, name="fc1")(a0)
+ fc2 = K.layers.Dense(2, name="fc2")(b0)
+ add1 = K.layers.Add(name="add_1")([fc1, fc2]) # [a, b] -> c
+ fc3 = K.layers.Dense(3, name="fc3")(add1)
+ sm = K.layers.Activation("softmax", name="sm")(fc3)
+
+ return x, [x, fc, b0, a0, fc1, fc2, add1, fc3, sm]
+
+ x, y = multiout_test()
+ record(
+ loss_fn_str="mse",
+ file_name="multiout_model.info",
+ input_shape=(3, 10),
+ label_shape=(3, 3),
+ optimizer=opt.SGD(learning_rate=0.1),
+ iteration=10,
+ inputs=x,
+ outputs=y,
+ # debug=["name", "summary", "output", "initial_weights"],
+ )
}
);
+INI multiout_model(
+ "multiout_model",
+ {
+ nn_base + "loss=mse | batch_size=3",
+ sgd_base + "learning_rate = 0.1",
+ I("x") + input_base + "input_shape = 1:10",
+ I("fc") + fc_base + "unit = 2",
+ I("fc1") + fc_base
+ + "unit=2 | input_layers=fc",
+ I("fc2") + fc_base
+ + "unit=2 | input_layers=fc",
+ I("add1", "type=addition | input_layers=fc1, fc2"),
+ I("fc3") + fc_base + "unit=3",
+ I("sm") + softmax_base
+ }
+);
+
/**
* @brief helper function to make model testcase
*
/**< Addition test */
mkModelIniTc(addition_resnet_like, "3:1:1:10", 10, ModelTestOption::COMPARE), // Todo: Enable option to ALL
+ /** Multiout test */
+ mkModelIniTc(multiout_model, "3:1:1:3", 10, ModelTestOption::COMPARE), // Todo: Enable option to ALL
+
/// #1192 time distribution inference bug
mkModelIniTc(fc_softmax_mse_distribute, "3:1:5:3", 1, ModelTestOption::NO_THROW_RUN),
mkModelIniTc(fc_softmax_cross_distribute, "3:1:5:3", 1, ModelTestOption::NO_THROW_RUN),