Enable time dist layer for LayerV2 design.
This patch tries to simulate the InitContext and the RunContext inside
the time dist layer so that proper shapes of variables can be passed to
the internal layer. Further, context changing function calls are
replicated on the actual InitContext/RunContext by detecting changes and
making those function calls again.
LayerNode was updated to ensure that a layer is not getting distributed
again and again.
Some more getter APIs were added for TensorDim and LayerContext.
Unittests related to distribute have also been enabled with this patch.
Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
return ML_ERROR_NONE;
}
- if (in_node->getType() == ActivationLayer::type) {
- ml_loge("It is not allowed to realize ativation layer, possibly layer is "
- "added right after activation");
+ if (act == ActivationType::ACT_UNKNOWN) {
+ ml_loge("cannot realize unknown activation type");
return ML_ERROR_INVALID_PARAMETER;
}
- if (act == ActivationType::ACT_UNKNOWN) {
- ml_loge("cannot realize unknown activation type");
+ if (in_node->getType() == ActivationLayer::type) {
+ ml_loge("It is not allowed to realize activation layer, possibly layer is "
+ "added right after activation");
return ML_ERROR_INVALID_PARAMETER;
}
return weights_spec.size() - 1;
}
+ /**
+ * @brief Request a new weight for the layer
+ *
+ * @param spec tensor spec
+ * @return unsigned int index of the weight for its getter
+ *
+ * @todo Consider providing a guarantee that the returned indices will always
+ * start from 0 and will always be incremental.
+ */
+ unsigned int requestWeight(const Weight::Spec &spec) {
+ weights_spec.emplace_back(spec);
+ return weights_spec.size() - 1;
+ }
+
/**
* @brief Request a new tensor for the layer
*
*/
typedef Var_Grad::Spec TensorSpec;
+ /**
+ * @brief Request a new tensor for the layer
+ *
+ * @param spec tensor spec
+ * @return unsigned int index of the tensor for its getter
+ *
+ * @todo Consider providing a guarantee that the returned indices will always
+ * start from 0 and will always be incremental.
+ */
+ unsigned int requestTensor(const TensorSpec &spec) {
+ tensors_spec.emplace_back(spec);
+ return tensors_spec.size() - 1;
+ }
+
/**
* @brief Get the current weights spec
*
*/
const std::vector<TensorSpec> &getTensorsSpec() const { return tensors_spec; }
+ /**
+ * @brief Get the number of requested tensors objects
+ *
+ * @return unsigned int number of requested tensors
+ */
+ unsigned int getNumTensors() const { return tensors_spec.size(); }
+
/**
* @brief Set the batch for the init context
*
return tensors[idx]->getGradientRef();
}
+ /**
+ * @brief check if the tensor has gradient
+ *
+ * @param idx Identifier of the tensor
+ * @return true if tensor has gradient, else false
+ */
+ bool tensorHasGradient(unsigned int idx) const {
+ return tensors[idx]->hasGradient();
+ }
+
+ /**
+ * @brief Get the tensor name
+ *
+ * @param idx Identifier of the tensor
+ * @return name of the tensor
+ */
+ const std::string &getTensorName(unsigned int idx) const {
+ return tensors[idx]->getName();
+ }
+
/**
* @brief Get the number of Outputs tensor objects
*
*/
unsigned int getNumWeights() const { return weights.size(); }
+ /**
+ * @brief Get the number of requested tensors objects
+ *
+ * @return unsigned int number of requested tensors
+ */
+ unsigned int getNumTensors() const { return tensors.size(); }
+
/**
* @brief Set the batch for the run context
*
int LayerNode::setProperty(std::vector<std::string> properties) {
int status = ML_ERROR_NONE;
+ bool already_distributed =
+ !std::get<props::Distribute>(*layer_node_props).empty() &&
+ std::get<props::Distribute>(*layer_node_props).get();
auto left_properties = loadProperties(properties, *layer_node_props);
/// note that setting distribute is only allowed for one time.
/// until we have layerNode::finalize and must not except timedist layer
- if (getDistribute()) {
- // auto &ac = nntrainer::AppContext::Global();
- // std::unique_ptr<nntrainer::Layer> dlayer =
- // ac.createObject<nntrainer::Layer>(TimeDistLayer::type);
- // dynamic_cast<TimeDistLayer*>(dlayer.get())->setDistLayer(std::move(layer));
- // layer = std::move(dlayer);
+ if (getDistribute() && !already_distributed) {
+ auto &ac = nntrainer::AppContext::Global();
+ std::unique_ptr<nntrainer::Layer> dlayer =
+ ac.createObject<nntrainer::Layer>(TimeDistLayer::type);
+ dynamic_cast<TimeDistLayer *>(dlayer.get())->setDistLayer(std::move(layer));
+ layer = std::move(dlayer);
}
std::vector<std::string> remainder;
return out;
}
-std::string LayerNode::getDistLayerType() const {
- // if (getDistribute())
- // return
- // std::static_pointer_cast<TimeDistLayer>(layerv1)->getDistLayerType();
- // else
- throw std::runtime_error(
- "Get distribution layer type for non-distributed layer");
-}
-
ActivationType LayerNode::getActivationType() const { return activation_type; }
ActivationType LayerNode::getActivationToBeRealized() const noexcept {
}
const nntrainer::Layer *LayerNode::getLayer() const {
- // if (getDistribute())
- // return ((TimeDistLayer *)(layer.get()))->getDistLayer();
- // else
- return layer.get();
+ if (getDistribute())
+ return ((TimeDistLayer *)(layer.get()))->getDistLayer();
+ else
+ return layer.get();
}
nntrainer::Layer *LayerNode::getLayer() {
- // if (getDistribute())
- // return ((TimeDistLayer *)(layer.get()))->getDistLayer();
- // else
- return layer.get();
+ if (getDistribute())
+ return ((TimeDistLayer *)(layer.get()))->getDistLayer();
+ else
+ return layer.get();
}
void LayerNode::updateInputLayers(const std::string &from,
if (finalized) {
if (run_context.readyToUse()) {
- layer->setBatch(run_context, batch);
+ getLayer()->setBatch(run_context, batch);
} else {
/** run_context has not been created yet */
- layer->setBatch(init_context, batch);
+ getLayer()->setBatch(init_context, batch);
}
}
}
*/
ActivationType getActivationToBeRealized() const noexcept;
- /**
- * @brief get distribute for this layer
- * @retval dist to enable/disable distribute
- */
- std::string getDistLayerType() const;
-
/**
* @brief Activation Type Getter
* @retval Activation Type.
*/
#include <layer_internal.h>
-#include <lazy_tensor.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
#include <parse_util.h>
namespace nntrainer {
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
static void reshape(Tensor &m) {
TensorDim d = m.getDim();
m.reshape({d[2], d[1], d[0], d[3]});
}
-void TimeDistLayer::setPosition() {
- positions[0] = net_input[0]->getVariableRef().getData();
- positions[1] = net_input[0]->getGradientRef().getData();
- positions[2] = net_hidden[0]->getVariableRef().getData();
- positions[3] = net_hidden[0]->getGradientRef().getData();
+void TimeDistLayer::setPosition(RunLayerContext &context) {
+ positions[0] = context.getInput(SINGLE_INOUT_IDX).getData();
+ positions[2] = context.getOutput(SINGLE_INOUT_IDX).getData();
+ /** TODO: use mode of execution here */
+ try {
+ positions[1] = context.getOutgoingDerivative(SINGLE_INOUT_IDX).getData();
+ positions[3] = context.getIncomingDerivative(SINGLE_INOUT_IDX).getData();
+ } catch (...) {
+ /** in case of training, these tensors will not exist */
+ }
}
-void TimeDistLayer::transposeInOut() {
+void TimeDistLayer::transposeInOut(RunLayerContext &context) {
// Position[0] : net_input.variable
- Tensor &input_ = net_input[0]->getVariableRef();
+ Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
input_.copy(transposeTensor(input_));
// Position[1] : net_input.gradient
- Tensor &ret_ = net_input[0]->getGradientRef();
+ Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
if (ret_.getData() != positions[0]) {
ret_.copy(transposeTensor(ret_));
} else {
}
// Position[2] : net_hidden.variable
- Tensor &hval_ = net_hidden[0]->getVariableRef();
+ Tensor &hval_ = context.getOutput(SINGLE_INOUT_IDX);
if (hval_.getData() != positions[0] && hval_.getData() != positions[1]) {
hval_.copy(transposeTensor(hval_));
} else {
// Position[3] : net_hidden.gradient
bool trans = true;
- Tensor &derivative_ = net_hidden[0]->getGradientRef();
+ Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
for (unsigned int i = 0; i < 3; ++i) {
if (derivative_.getData() == positions[i]) {
trans = false;
return in;
}
-int TimeDistLayer::initialize(Manager &manager) {
- int status = ML_ERROR_NONE;
-
- if (getNumInputs() != 1) {
+void TimeDistLayer::finalize(InitLayerContext &context) {
+ if (context.getNumInputs() != 1) {
throw std::invalid_argument("Time distributed layer takes only one input");
}
throw std::invalid_argument("distributed layer is not set properly");
}
- if (input_dim[0].channel() != 1) {
+ const TensorDim &input_dim = context.getInputDimensions()[0];
+ if (input_dim.channel() != 1) {
throw std::invalid_argument(
"only 1 channel is allow for time distributed layer");
}
- TensorDim dist_dim = input_dim[0];
+ /**
+ * simulate an InitLayerContext, and then replicate its effect onto the
+ * actual context
+ */
+ TensorDim dist_dim = input_dim;
dist_dim.height(1);
+ InitLayerContext dist_context({dist_dim}, context.getNumOutputs());
- dist_layer->setInputDimension({dist_dim});
-
- // Set the weight of dist_layer
- // Input & Output Buffer is set by manager of model.
// During forwarding and backwarding, it set the input and output buffer of
// dist_layer properly
// dist_layer will use forwarding_with_val and backwarding_with_val
- dist_layer->initialize(manager);
+ dist_layer->finalize(dist_context);
- output_dim[0] = dist_layer->getOutputDimension()[0];
+ TensorDim output_dim = dist_context.getOutputDimensions()[0];
+ // input_dim.height is number of time iteration
+ output_dim.height(input_dim.height());
+ context.setOutputDimensions({output_dim});
- // input_dim[0].height is number of time iteration
- output_dim[0].height(input_dim[0].height());
+ /** real setting of context */
+ fillLayerInitContext(context, dist_context);
+}
- return status;
+void TimeDistLayer::fillWeightsFromContext(RunLayerContext &context) {
+ weights_wrapper.resize(context.getNumWeights());
+
+ /** create weights */
+ for (unsigned int idx = 0; idx < context.getNumWeights(); idx++) {
+ if (context.weightHasGradient(idx)) {
+ weights_wrapper[idx] =
+ Weight(context.getWeight(idx), context.getWeightGrad(idx),
+ context.getWeightName(idx));
+ } else {
+ weights_wrapper[idx] =
+ Weight(context.getWeight(idx), Tensor(), context.getWeightName(idx));
+ }
+ }
}
-void TimeDistLayer::forwarding(bool training) {
- setPosition();
+void TimeDistLayer::fillTensorsFromContext(RunLayerContext &context) {
+ tensors_wrapper.resize(context.getNumTensors());
+
+ /** create tensors */
+ for (unsigned int idx = 0; idx < context.getNumTensors(); idx++) {
+ if (context.tensorHasGradient(idx)) {
+ tensors_wrapper[idx] =
+ Var_Grad(context.getTensor(idx), context.getTensorGrad(idx),
+ context.getTensorName(idx));
+ } else {
+ tensors_wrapper[idx] =
+ Var_Grad(context.getTensor(idx), Tensor(), context.getTensorName(idx));
+ }
+ }
+}
+
+std::vector<Weight *> TimeDistLayer::getWeightsForContext() {
+ /** create weights for context */
+ std::vector<Weight *> weights_for_context;
+ for (auto &w : weights_wrapper)
+ weights_for_context.push_back(&w);
+
+ return weights_for_context;
+}
- Tensor &hidden_ = net_hidden[0]->getVariableRef();
- Tensor &input_ = net_input[0]->getVariableRef();
+std::vector<Var_Grad *> TimeDistLayer::getTensorsForContext() {
+ /** create tensors for context */
+ std::vector<Var_Grad *> tensors_for_context;
+ for (auto &t : tensors_wrapper)
+ tensors_for_context.push_back(&t);
+
+ return tensors_for_context;
+}
+
+void TimeDistLayer::forwarding(RunLayerContext &context, bool training) {
+ setPosition(context);
+
+ Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
+ Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
// input_.dim = [ b, 1, h, w ]
- Tensor hidden_g, h_g;
+ Tensor h_g;
- TensorDim ho_dim = hidden_.getDim();
- TensorDim in_dim = input_.getDim();
+ const TensorDim &ho_dim = hidden_.getDim();
+ const TensorDim &in_dim = input_.getDim();
// TODO: This transposed Input Tensor could be resued for backwarding
Tensor in = transposeTensor(input_);
h_dim.channel(1);
h_dim.height(1);
- if (dist_layer->getType() == "loss") {
- hidden_g = net_hidden[0]->getGradientRef();
- if (!hidden_g.uninitialized()) {
- h_g = transposeTensor(hidden_g);
- }
+ if (dist_layer->requireLabel() &&
+ context.isLabelAvailable(SINGLE_INOUT_IDX)) {
+ Tensor &hidden_g = context.getLabel(SINGLE_INOUT_IDX);
+ h_g = transposeTensor(hidden_g);
}
- /** @todo use context->getName() once context is enabled */
- Var_Grad in_var(i_dim, true, false, "dist_layer:input");
- Var_Grad out_var(h_dim, true, false, "dist_layer:output");
+ Var_Grad in_var(i_dim, false, false, context.getName() + ":input");
+ Var_Grad out_var(h_dim,
+ dist_layer->requireLabel() &&
+ context.isLabelAvailable(SINGLE_INOUT_IDX),
+ false, context.getName() + ":output");
+
+ fillWeightsFromContext(context);
+ fillTensorsFromContext(context);
for (unsigned int i = 0; i < in_dim.height(); ++i) {
//
in_var.initializeVariable(in_iter);
out_var.initializeVariable(out_iter);
- if (dist_layer->getType() == "loss") {
+ if (dist_layer->requireLabel() &&
+ context.isLabelAvailable(SINGLE_INOUT_IDX)) {
label_iter =
h_g.getSharedDataTensor(h_dim, i * ho_dim.batch() * ho_dim.width());
out_var.initializeGradient(label_iter);
}
- dist_layer->setInputBuffers({std::make_shared<Var_Grad>(in_var)});
- dist_layer->setOutputBuffers({std::make_shared<Var_Grad>(out_var)});
+ RunLayerContext dist_context(context.getName(), context.getLoss(),
+ getWeightsForContext(), {&in_var}, {&out_var},
+ getTensorsForContext());
- dist_layer->forwarding();
+ dist_layer->forwarding(dist_context, training);
}
hidden_.copy(transposeTensor(out));
+ clearFromContext();
}
-void TimeDistLayer::copy(std::shared_ptr<LayerV1> l) {
- LayerV1::copy(l);
-
- std::shared_ptr<TimeDistLayer> from =
- std::static_pointer_cast<TimeDistLayer>(l);
- this->dist_layer = from->dist_layer;
-}
-
-void TimeDistLayer::setDistLayer(std::shared_ptr<LayerV1> l) {
- dist_layer = l;
-};
-
-void TimeDistLayer::calcDerivative() {
- Tensor &derivative_ = net_hidden[0]->getGradientRef();
- Tensor &hval_ = net_hidden[0]->getVariableRef();
- Tensor &ret_ = net_input[0]->getGradientRef();
- Tensor &input_ = net_input[0]->getVariableRef();
+void TimeDistLayer::calcDerivative(RunLayerContext &context) {
+ Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
+ Tensor &hval_ = context.getOutput(SINGLE_INOUT_IDX);
+ Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
+ Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
TensorDim der_dim = derivative_.getDim();
TensorDim ret_dim = ret_.getDim();
TensorDim r_dim = {ret_dim[2], 1, 1, ret_dim[3]};
TensorDim d_dim = {der_dim[2], 1, 1, der_dim[3]};
- /** @todo use context->getName() once context is enabled */
- Var_Grad in_var(r_dim, true, false, "dist_layer:input");
- Var_Grad out_var(d_dim, true, false, "dist_layer:output");
+ Var_Grad in_var(r_dim, true, false, context.getName() + ":input");
+ Var_Grad out_var(d_dim, true, false, context.getName() + ":output");
+
+ fillWeightsFromContext(context);
+ fillTensorsFromContext(context);
for (unsigned int i = 0; i < der_dim[0]; ++i) {
Tensor ret_iter =
out_var.initializeGradient(d_iter);
out_var.initializeVariable(hval_iter);
- dist_layer->setInputBuffers({std::make_shared<Var_Grad>(in_var)});
- dist_layer->setOutputBuffers({std::make_shared<Var_Grad>(out_var)});
+ RunLayerContext dist_context(context.getName(), context.getLoss(),
+ getWeightsForContext(), {&in_var}, {&out_var},
+ getTensorsForContext());
- dist_layer->calcDerivative();
+ dist_layer->calcDerivative(dist_context);
}
ret_.copy(transposeTensor(ret_));
hval_.reshape({der_dim[2], 1, der_dim[0], der_dim[3]});
derivative_.reshape({der_dim[2], 1, der_dim[0], der_dim[3]});
input_.reshape({ret_dim[2], 1, ret_dim[0], ret_dim[3]});
+ clearFromContext();
}
-void TimeDistLayer::calcGradient() {
+void TimeDistLayer::calcGradient(RunLayerContext &context) {
// Even if the dist_layer->getNumWeights() == 0, We do transpose here
// for the calculation of derivatives and overwrite original tensors.
// And use them in calcDerivatives() without transpose.
- transposeInOut();
+ transposeInOut(context);
- if (dist_layer->getNumWeights() == 0)
+ if (context.getNumWeights() == 0)
return;
- Tensor &input_ = net_input[0]->getVariableRef();
- Tensor &derivative_ = net_hidden[0]->getGradientRef();
+ Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
+ Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
TensorDim der_dim = derivative_.getDim();
TensorDim in_dim = input_.getDim();
TensorDim i_dim = {in_dim[2], 1, 1, in_dim[3]};
TensorDim d_dim = {der_dim[2], 1, 1, der_dim[3]};
+ fillWeightsFromContext(context);
+ fillTensorsFromContext(context);
+
for (unsigned int i = 0; i < der_dim[0]; ++i) {
Tensor in_iter =
input_.getSharedDataTensor(i_dim, i * i_dim.batch() * i_dim.width());
Tensor d_iter =
derivative_.getSharedDataTensor(d_dim, i * d_dim.batch() * d_dim.width());
- /** @todo use context->getName() once context is enabled */
- Var_Grad in_var(i_dim, true, false, "dist_layer:input");
- Var_Grad out_var(d_dim, true, false, "dist_layer:output");
+ Var_Grad in_var(i_dim, true, false, context.getName() + ":input");
+ Var_Grad out_var(d_dim, true, false, context.getName() + ":output");
in_var.initializeVariable(in_iter);
out_var.initializeGradient(d_iter);
- dist_layer->setInputBuffers({std::make_shared<Var_Grad>(in_var)});
- dist_layer->setOutputBuffers({std::make_shared<Var_Grad>(out_var)});
+ RunLayerContext dist_context(context.getName(), context.getLoss(),
+ getWeightsForContext(), {&in_var}, {&out_var},
+ getTensorsForContext());
+
+ dist_layer->calcGradient(dist_context);
+ }
+ clearFromContext();
+}
+
+void TimeDistLayer::fillLayerInitContext(InitLayerContext &context,
+ const InitLayerContext &dist_context) {
+ /** real set the input flags */
+ auto const &input_dims = context.getInputDimensions();
+ for (unsigned int idx = 0; idx < dist_context.getNumInputs(); idx++) {
+ context.setDynDimFlagInputDimension(idx, input_dims[idx].getDynDimFlag());
+ context.setEffDimFlagInputDimension(idx, input_dims[idx].getEffDimFlag());
+ }
+
+ /** real request of tensors */
+ for (auto const &ts : dist_context.getTensorsSpec())
+ context.requestTensor(ts);
+
+ /** real request of weights */
+ for (auto const &ws : dist_context.getWeightsSpec())
+ context.requestWeight(ws);
+}
+
+void TimeDistLayer::setBatch(RunLayerContext &context, unsigned int batch) {
+ if (context.getNumTensors() > 0) {
+ const TensorDim &out_dim = context.getOutput(SINGLE_INOUT_IDX).getDim();
+ const TensorDim &in_dim = context.getInput(SINGLE_INOUT_IDX).getDim();
+
+ TensorDim i_dim = {in_dim[2], 1, 1, in_dim[3]};
+ TensorDim o_dim = {out_dim[2], 1, 1, out_dim[3]};
+
+ Var_Grad in_var(i_dim, true, false, context.getName() + ":input");
+ Var_Grad out_var(o_dim, true, false, context.getName() + ":output");
- dist_layer->calcGradient();
+ fillWeightsFromContext(context);
+ fillTensorsFromContext(context);
+
+ RunLayerContext dist_context(context.getName(), context.getLoss(),
+ getWeightsForContext(), {&in_var}, {&out_var},
+ getTensorsForContext());
+
+ dist_layer->setBatch(dist_context, batch);
+
+ for (unsigned int idx = 0; idx < dist_context.getNumTensors(); idx++) {
+ context.updateTensor(idx, dist_context.getTensor(idx).getDim().batch());
+ }
+
+ clearFromContext();
+ }
+}
+
+void TimeDistLayer::setBatch(InitLayerContext &context, unsigned int batch) {
+ TensorDim input_dim = context.getInputDimensions()[SINGLE_INOUT_IDX];
+ input_dim.height(1);
+ InitLayerContext dist_context({input_dim}, context.getNumOutputs());
+
+ TensorDim output_dim = context.getOutputDimensions()[0];
+ // input_dim.height is number of time iteration
+ output_dim.height(1);
+ dist_context.setOutputDimensions({output_dim});
+
+ /** create dist_context using the context */
+ fillLayerInitContext(dist_context, context);
+ if (context.getNumTensors() > 0) {
+ dist_layer->setBatch(dist_context, batch);
+
+ auto const &tensors_spec = dist_context.getTensorsSpec();
+ for (unsigned int idx = 0; idx < dist_context.getNumTensors(); idx++) {
+ context.updateTensorSpec(idx, std::get<0>(tensors_spec[idx]).batch());
+ }
}
}
#define __TIME_DIST_H__
#ifdef __cplusplus
-#include <layer_internal.h>
-#include <tensor.h>
+#include <layer_devel.h>
namespace nntrainer {
* @class TimeDistLayer
* @brief Time Distribution Layer
*/
-class TimeDistLayer : public LayerV1 {
+class TimeDistLayer : public Layer {
public:
/**
* @brief Constructor of Time Distribution Layer
*/
- template <typename... Args> TimeDistLayer(Args... args) : LayerV1(args...) {
+ TimeDistLayer() : Layer() {
for (unsigned int i = 0; i < 4; ++i) {
positions[i] = nullptr;
}
TimeDistLayer &operator=(TimeDistLayer &&rhs) = default;
/**
- * @copydoc Layer::forwarding(bool training)
+ * @copydoc Layer::finalize(InitLayerContext &context)
*/
- void forwarding(bool training = true) override;
+ void finalize(InitLayerContext &context) override;
/**
- * @copydoc Layer::calcDerivative()
+ * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
*/
- void calcDerivative() override;
+ void forwarding(RunLayerContext &context, bool training) override;
/**
- * @copydoc Layer::calcGradient()
+ * @copydoc Layer::calcDerivative(RunLayerContext &context)
*/
- void calcGradient() override;
+ void calcDerivative(RunLayerContext &context) override;
/**
- * @brief copy layer
- * @param[in] l layer to copy
+ * @copydoc Layer::calcGradient(RunLayerContext &context)
*/
- void copy(std::shared_ptr<LayerV1> l) override;
+ void calcGradient(RunLayerContext &context) override;
/**
- * @brief initialize layer
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+ * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
*/
- int initialize(Manager &manager) override;
+ void exportTo(Exporter &exporter,
+ const ExportMethods &method) const override {
+ dist_layer->exportTo(exporter, method);
+ }
+
+ /**
+ * @copydoc Layer::getType()
+ */
+ const std::string getType() const override { return TimeDistLayer::type; };
+
+ /**
+ * @copydoc Layer::supportBackwarding()
+ */
+ bool supportBackwarding() const { return dist_layer->supportBackwarding(); }
+
+ /**
+ * @copydoc Layer::setBatch(InitLayerContext &context, unsigned int batch)
+ */
+ void setBatch(InitLayerContext &context, unsigned int batch) override;
+
+ /**
+ * @copydoc Layer::setBatch(RunLayerContext &context, unsigned int batch)
+ */
+ void setBatch(RunLayerContext &context, unsigned int batch) override;
+
+ /**
+ * @copydoc Layer::setProperty(const PropertyType type, const std::string
+ * &value)
+ */
+ void setProperty(const std::vector<std::string> &values) override {
+ /**
+ * @note assumption: name of the dist_layer is set via setName() and not
+ * with setProperty()
+ */
+ if (!values.empty())
+ dist_layer->setProperty(values);
+ }
+
+ /**
+ * @copydoc Layer::supportInPlace()
+ */
+ virtual bool supportInPlace() const { return false; }
+
+ /**
+ * @copydoc Layer::requireLabel()
+ */
+ virtual bool requireLabel() const { return dist_layer->requireLabel(); }
/**
* @brief set distribute layer
* @param[in] l layer to distribute along time
*/
- void setDistLayer(std::shared_ptr<LayerV1> l);
+ void setDistLayer(std::unique_ptr<Layer> &&l) { dist_layer = std::move(l); }
/**
- * @brief get distribute layer type
- * @retval layer type
+ * @brief get distribute layer
+ * @retval dist_layer std::shared_ptr<Layer>
*/
- std::string getDistLayerType() { return dist_layer->getType(); }
+ Layer *getDistLayer() { return dist_layer.get(); };
/**
* @brief get distribute layer
* @retval dist_layer std::shared_ptr<Layer>
*/
- std::shared_ptr<LayerV1> &getDistLayer() { return dist_layer; };
+ const Layer *getDistLayer() const { return dist_layer.get(); };
+
+ inline static const std::string type = "time_dist";
+private:
/**
- * @copydoc Layer::requireLabe()
+ * @brief Layer to be distributed through time
+ */
+ std::unique_ptr<Layer> dist_layer;
+ std::vector<Weight> weights_wrapper;
+ std::vector<Var_Grad> tensors_wrapper;
+
+ /**
+ * @brief pointer value of each input/output tensors to compare position
+ */
+ float *positions[4];
+
+ /**
+ * @brief Transpose Input and Output Tensors to avoid duplicatation becuase
+ * of memory optimization
+ * It transpose the net_input.getVariableRef, net_input.getGradientRef,
+ * net_hidden.getVariableRef and net_hidden.getGradientRef.
+ *
+ * @param context Run layer context
*/
- bool requireLabel() const override { return dist_layer->requireLabel(); }
+ void transposeInOut(RunLayerContext &context);
/**
* @brief get transposed Tensor according to time iteration axis
* @param[in] m Tensor
* @retval Tensor transposed Tensor
*/
- Tensor transposeTensor(Tensor &m);
+ static Tensor transposeTensor(Tensor &m);
/**
* @brief calculate the pointer of each input and output tensors
+ *
+ * @param context Run layer context
*/
- void setPosition();
+ void setPosition(RunLayerContext &context);
/**
- * @brief Transpose Input and Output Tensors to avoid duplicatation becuase
- * of memory optimization
- * It transpose the net_input.getVariableRef, net_input.getGradientRef,
- * net_hidden.getVariableRef and net_hidden.getGradientRef.
+ * @brief Fill weights from the given context
+ *
+ * @param context The given context
*/
- void transposeInOut();
-
- using LayerV1::setProperty;
+ void fillWeightsFromContext(RunLayerContext &context);
/**
- * @copydoc Layer::setProperty(const PropertyType type, const std::string
- * &value)
+ * @brief Get the Weights for Context object
+ *
+ * @return std::vector<Weight *> The list of weights
*/
- void setProperty(const PropertyType type,
- const std::string &value = "") override {
- /**
- * @note assumption: name of the dist_layer is set via setName() and not
- * with setProperty()
- */
- dist_layer->setProperty(type, value);
- }
+ std::vector<Weight *> getWeightsForContext();
/**
- * @copydoc Layer::getType()
+ * @brief Fill tensors from the given context
+ *
+ * @param context The given context
*/
- const std::string getType() const override { return TimeDistLayer::type; };
+ void fillTensorsFromContext(RunLayerContext &context);
- inline static const std::string type = "time_dist";
+ /**
+ * @brief Get the Tensors for Context object
+ *
+ * @return std::vector<Var_Grad *> The list of tensors
+ */
+ std::vector<Var_Grad *> getTensorsForContext();
-private:
/**
- * @brief Layer to be distributed through time
+ * @brief Clean the values filled from context
+ *
+ * @note This is necessary to ensure that all the references to the stored
+ * tensors are cleared for the memory to be released after run is complete.
+ *
*/
- std::shared_ptr<LayerV1> dist_layer;
+ void clearFromContext() {
+ weights_wrapper.clear();
+ tensors_wrapper.clear();
+ }
/**
- * @brief pointer value of each input/output tensors to compare position
+ * @brief Fill init context from the given dist context
+ *
+ * @param context context to be set/filled
+ * @param dist_context context from which to be filled
*/
- float *positions[4];
+ void fillLayerInitContext(InitLayerContext &context,
+ const InitLayerContext &dist_context);
};
} // namespace nntrainer
dyn_dim_flag = dim_flag_;
}
+ /**
+ * @brief Get the Dim Flag to retrieve effective dimension
+ * @note eg) if dimension 4:1:10:1 should be squeezed to 4:10,
+ * set this to 0b1010, rightmost is width
+ *
+ * @return dim_flag_ dimension bit to calculate, rightmost is width
+ */
+ const std::bitset<MAXDIM> &getEffDimFlag() const { return eff_dim_flag; }
+
+ /**
+ * @brief Get the dynamic Dim Flag to retrieve dynamic dimension (that can
+ * change during running)
+ * @note eg) if dimension 4:1:10:1 should be squeezed to dynamic to batch,
+ * set this to 0b1000, rightmost is width
+ * @note when setting dynamic dimension, the calculation must remain
+ * independent of the dynamic dimension. Please check this :)
+ *
+ * @return dim_flag_ dimension bit to calculate, rightmost is width
+ */
+ const std::bitset<MAXDIM> &getDynDimFlag() const { return dyn_dim_flag; }
+
/**
* @brief swap variable of Conv2D Layer
* @parma[out] lhs Optimizer
INSTANTIATE_TEST_CASE_P(
nntrainerIniAutoTests, nntrainerIniTest, ::testing::Values(
/**< positive: basic valid scenarios (2 positive and 3 negative cases) */
- mkIniTc("basic_p", {nw_base_mse, adam, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
- mkIniTc("basic2_p", {nw_base_mse, sgd, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
- mkIniTc("basic3_p", {nw_base + "loss=cross_sigmoid", adam, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
- mkIniTc("basic4_p", {nw_base + "loss=cross_softmax", adam, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
- mkIniTc("basic5_p", {nw_base_cross, adam, input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("basic6_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("basic_act_p", {nw_base_cross, sgd, input + "-Activation", act_relu+"input_layers=inputlayer", out+"input_layers=activation_relu" }, SUCCESS),
- mkIniTc("basic_bn_p", {nw_base_cross, sgd, input + "-Activation", batch_normal+"input_layers=inputlayer", act_relu+"input_layers=bn", out+"input_layers=activation_relu" }, SUCCESS),
- mkIniTc("basic_bn2_p", {nw_base_cross, sgd, input + "-Activation", batch_normal + "Activation = relu"+"input_layers=inputlayer", out+"input_layers=bn" }, SUCCESS),
- mkIniTc("basic_dataset_p", {nw_base_cross, adam, dataset, input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("basic_dataset2_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer", dataset}, SUCCESS),
- mkIniTc("basic_dataset3_p", {dataset, nw_base_cross, sgd, input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("basic_conv2d_p", {nw_base_cross, adam, conv2d + "input_shape = 1:10:10"}, SUCCESS),
- mkIniTc("no_testSet_p", {nw_base_cross, adam, dataset + "-TestData", input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("no_validSet_p", {nw_base_cross, adam, dataset + "-ValidData", input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("no_bufferSize_p", {nw_base_cross, adam, dataset + "-BufferSize", input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("buffer_size_smaller_than_batch_size_p", {nw_base_cross, adam, dataset + "BufferSize=26", input, out+"input_layers=inputlayer"}, SUCCESS),
- mkIniTc("buffer_size_smaller_than_batch_size2_p", {nw_base_cross, adam, input, out+"input_layers=inputlayer", dataset + "BufferSize=26"}, SUCCESS),
- mkIniTc("loss_layer1_p", {nw_base, adam, input + "-Activation", out + "-Activation", loss_mse}, SUCCESS),
- mkIniTc("loss_layer2_p", {nw_base, adam, input + "-Activation", out, loss_mse}, SUCCESS),
- mkIniTc("loss_layer3_n", {nw_base, adam, input + "-Activation", out + "-Activation", loss_cross}, INITFAIL | COMPFAIL),
- mkIniTc("loss_layer4_p", {nw_base, adam, input + "-Activation", out, loss_cross}, SUCCESS),
- mkIniTc("loss_layer5_p", {nw_base, adam, input + "-Activation", out + "-Activation", loss_cross_sigmoid}, SUCCESS),
- mkIniTc("loss_layer6_p", {nw_base, adam, input + "-Activation", out, loss_cross_sigmoid}, SUCCESS),
- mkIniTc("loss_layer7_p", {nw_base, adam, input + "-Activation", out + "-Activation", loss_cross_softmax}, SUCCESS),
- mkIniTc("loss_layer8_p", {nw_base, adam, input + "-Activation", out, loss_cross_softmax}, SUCCESS),
+ mkIniTc("basic_p", {nw_base_mse, adam, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
+ mkIniTc("basic2_p", {nw_base_mse, sgd, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
+ mkIniTc("basic3_p", {nw_base + "loss=cross_sigmoid", adam, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
+ mkIniTc("basic4_p", {nw_base + "loss=cross_softmax", adam, input + "-Activation", out+"input_layers=inputlayer" + "-Activation"}, SUCCESS),
+ mkIniTc("basic5_p", {nw_base_cross, adam, input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("basic6_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("basic_act_p", {nw_base_cross, sgd, input + "-Activation", act_relu+"input_layers=inputlayer", out+"input_layers=activation_relu" }, SUCCESS),
+ mkIniTc("basic_bn_p", {nw_base_cross, sgd, input + "-Activation", batch_normal+"input_layers=inputlayer", act_relu+"input_layers=bn", out+"input_layers=activation_relu" }, SUCCESS),
+ mkIniTc("basic_bn2_p", {nw_base_cross, sgd, input + "-Activation", batch_normal + "Activation = relu"+"input_layers=inputlayer", out+"input_layers=bn" }, SUCCESS),
+ mkIniTc("basic_dataset_p", {nw_base_cross, adam, dataset, input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("basic_dataset2_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer", dataset}, SUCCESS),
+ mkIniTc("basic_dataset3_p", {dataset, nw_base_cross, sgd, input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("basic_conv2d_p", {nw_base_cross, adam, conv2d + "input_shape = 1:10:10"}, SUCCESS),
+ mkIniTc("no_testSet_p", {nw_base_cross, adam, dataset + "-TestData", input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("no_validSet_p", {nw_base_cross, adam, dataset + "-ValidData", input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("no_bufferSize_p", {nw_base_cross, adam, dataset + "-BufferSize", input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("buffer_size_smaller_than_batch_size_p", {nw_base_cross, adam, dataset + "BufferSize=26", input, out+"input_layers=inputlayer"}, SUCCESS),
+ mkIniTc("buffer_size_smaller_than_batch_size2_p", {nw_base_cross, adam, input, out+"input_layers=inputlayer", dataset + "BufferSize=26"}, SUCCESS),
+ mkIniTc("loss_layer1_p", {nw_base, adam, input + "-Activation", out + "-Activation", loss_mse}, SUCCESS),
+ mkIniTc("loss_layer2_p", {nw_base, adam, input + "-Activation", out, loss_mse}, SUCCESS),
+ mkIniTc("loss_layer3_n", {nw_base, adam, input + "-Activation", out + "-Activation", loss_cross}, INITFAIL | COMPFAIL),
+ mkIniTc("loss_layer4_p", {nw_base, adam, input + "-Activation", out, loss_cross}, SUCCESS),
+ mkIniTc("loss_layer5_p", {nw_base, adam, input + "-Activation", out + "-Activation", loss_cross_sigmoid}, SUCCESS),
+ mkIniTc("loss_layer6_p", {nw_base, adam, input + "-Activation", out, loss_cross_sigmoid}, SUCCESS),
+ mkIniTc("loss_layer7_p", {nw_base, adam, input + "-Activation", out + "-Activation", loss_cross_softmax}, SUCCESS),
+ mkIniTc("loss_layer8_p", {nw_base, adam, input + "-Activation", out, loss_cross_softmax}, SUCCESS),
/**< half negative: init fail cases (1 positive and 4 negative cases) */
mkIniTc("unknown_loss_n", {nw_base_cross + "loss = unknown", adam, input, out+"input_layers=inputlayer"}, COMPFAIL | INITFAIL),
/**
* @brief Ini file unittest with distributed layer
*/
-TEST(nntrainerIniTest, DISABLED_distribute_p_01) {
+TEST(nntrainerIniTest, distribute_p_01) {
ScopedIni s{
"distribute_p1",
{nw_base_cross, adam,
mkModelTc(addition_resnet_like, "3:1:1:10", 10),
/// #1192 time distribution inference bug
- // mkModelTc(fc_softmax_mse_distribute_validate, "3:1:5:3", 1),
- // mkModelTc(fc_softmax_cross_distribute_validate, "3:1:5:3", 1),
- // mkModelTc(fc_sigmoid_cross_distribute_validate, "3:1:5:3", 1)
+ mkModelTc(fc_softmax_mse_distribute_validate, "3:1:5:3", 1),
+ mkModelTc(fc_softmax_cross_distribute_validate, "3:1:5:3", 1),
+ mkModelTc(fc_sigmoid_cross_distribute_validate, "3:1:5:3", 1),
mkModelTc(lstm_basic, "1:1:1:1", 10),
mkModelTc(lstm_return_sequence, "1:1:2:1", 10),
mkModelTc(lstm_return_sequence_with_batch, "2:1:2:1", 10),