Update batch norm to layer v2 style.
Add corresponding common unittests and enable modelfile
and models unittest.
Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
*
*/
-#include <assert.h>
#include <bn_layer.h>
#include <layer_internal.h>
#include <lazy_tensor.h>
namespace nntrainer {
+static constexpr size_t SINGLE_INOUT_IDX = 0;
+
enum BNParams { mu, var, gamma, beta };
/// @todo add multiple axis support
-int BatchNormalizationLayer::initialize(Manager &manager) {
- int status = ML_ERROR_NONE;
-
- if (getNumInputs() != 1) {
+void BatchNormalizationLayer::finalize(InitLayerContext &context) {
+ if (context.getNumInputs() != 1) {
throw std::invalid_argument(
"Only one input is allowed for batch normalization layer");
}
- output_dim[0] = input_dim[0];
+ std::vector<TensorDim> output_dims(1);
+
+ /** set output dimensions */
+ auto const &in_dim = context.getInputDimensions()[0];
+ context.setOutputDimensions(context.getInputDimensions());
+
TensorDim dim;
/// @note this logic cannot tell channel is actually 1 or it is just not used.
if (axis == -1)
- axis = input_dim[0].channel() > 1 ? 1 : 3;
+ axis = in_dim.channel() > 1 ? 1 : 3;
- dim.setTensorDim(axis, input_dim[0].getTensorDim(axis));
+ dim.setTensorDim(axis, in_dim.getTensorDim(axis));
for (int i = 0; i < 4; ++i) {
if (axis != i)
axes_to_reduce.push_back(i);
}
- weights.clear();
- if (weights.empty()) {
- weights.reserve(4);
- weights.emplace_back(dim, initializers[BNParams::mu],
- WeightRegularizer::NONE, 1.0f, false, false,
- "BN::moving_mean");
- weights.emplace_back(dim, initializers[BNParams::var],
- WeightRegularizer::NONE, 1.0f, false, false,
- "BN::moving_variance");
- weights.emplace_back(dim, initializers[BNParams::gamma],
- WeightRegularizer::NONE, 1.0f, true, false,
- "BN::gamma");
- weights.emplace_back(dim, initializers[BNParams::beta],
- WeightRegularizer::NONE, 1.0f, true, false,
- "BN::beta");
- manager.trackWeights(weights);
- } else {
- weights[BNParams::mu].reset(dim, initializers[BNParams::mu],
- WeightRegularizer::NONE, 1.0f, false);
- weights[BNParams::var].reset(dim, initializers[BNParams::var],
- WeightRegularizer::NONE, 1.0f, false);
- weights[BNParams::gamma].reset(dim, initializers[BNParams::gamma],
- WeightRegularizer::NONE, 1.0f, true);
- weights[BNParams::beta].reset(dim, initializers[BNParams::beta],
- WeightRegularizer::NONE, 1.0f, true);
- }
+ weight_idx[BNParams::mu] = context.requestWeight(
+ dim, initializers[BNParams::mu], WeightRegularizer::NONE, 1.0f,
+ "BN::moving_mean", false);
+ weight_idx[BNParams::var] = context.requestWeight(
+ dim, initializers[BNParams::var], WeightRegularizer::NONE, 1.0f,
+ "BN::moving_variance", false);
+ weight_idx[BNParams::gamma] =
+ context.requestWeight(dim, initializers[BNParams::gamma],
+ WeightRegularizer::NONE, 1.0f, "BN::gamma", true);
+ weight_idx[BNParams::beta] =
+ context.requestWeight(dim, initializers[BNParams::beta],
+ WeightRegularizer::NONE, 1.0f, "BN::beta", true);
+}
+
+void BatchNormalizationLayer::setProperty(
+ const std::vector<std::string> &values) {
+ /// @todo: deprecate this in favor of loadProperties
+ for (unsigned int i = 0; i < values.size(); ++i) {
+ std::string key;
+ std::string value;
+ std::stringstream ss;
+
+ if (getKeyValue(values[i], key, value) != ML_ERROR_NONE) {
+ throw std::invalid_argument("Error parsing the property: " + values[i]);
+ }
+
+ if (value.empty()) {
+ ss << "value is empty: key: " << key << ", value: " << value;
+ throw std::invalid_argument(ss.str());
+ }
- return status;
+ /// @note this calls derived setProperty if available
+ setProperty(key, value);
+ }
}
-void BatchNormalizationLayer::setProperty(const PropertyType type,
+void BatchNormalizationLayer::setProperty(const std::string &type_str,
const std::string &value) {
+ using PropertyType = LayerV1::PropertyType;
int status = ML_ERROR_NONE;
+ LayerV1::PropertyType type =
+ static_cast<LayerV1::PropertyType>(parseLayerProperty(type_str));
+
switch (type) {
case PropertyType::epsilon:
- if (!value.empty()) {
- status = setFloat(epsilon, value);
- throw_status(status);
- }
+ status = setFloat(epsilon, value);
+ throw_status(status);
break;
case PropertyType::moving_mean_initializer:
- if (!value.empty()) {
- initializers[BNParams::mu] =
- (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
- }
+ initializers[BNParams::mu] =
+ (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
break;
case PropertyType::moving_variance_initializer:
- if (!value.empty()) {
- initializers[BNParams::var] =
- (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
- }
+ initializers[BNParams::var] =
+ (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
break;
case PropertyType::beta_initializer:
- if (!value.empty()) {
- initializers[BNParams::beta] =
- (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
- }
+ initializers[BNParams::beta] =
+ (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
break;
case PropertyType::gamma_initializer:
- if (!value.empty()) {
- initializers[BNParams::gamma] =
- (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
- }
+ initializers[BNParams::gamma] =
+ (WeightInitializer)parseType(value, TOKEN_WEIGHT_INIT);
break;
case PropertyType::momentum:
- if (!value.empty()) {
- status = setFloat(momentum, value);
- throw_status(status);
- }
+ status = setFloat(momentum, value);
+ throw_status(status);
break;
default:
- LayerV1::setProperty(type, value);
- break;
+ std::string msg =
+ "[BatchNormalizationLayer] Unknown Layer Property Key for value " +
+ std::string(value);
+ throw exception::not_supported(msg);
}
}
-void BatchNormalizationLayer::forwarding(bool training) {
- Tensor &mu = weightAt(BNParams::mu).getVariableRef();
- Tensor &var = weightAt(BNParams::var).getVariableRef();
- Tensor &gamma = weightAt(BNParams::gamma).getVariableRef();
- Tensor &beta = weightAt(BNParams::beta).getVariableRef();
+void BatchNormalizationLayer::forwarding(RunLayerContext &context,
+ bool training) {
+ Tensor &mu = context.getWeight(weight_idx[BNParams::mu]);
+ Tensor &var = context.getWeight(weight_idx[BNParams::var]);
+ Tensor &gamma = context.getWeight(weight_idx[BNParams::gamma]);
+ Tensor &beta = context.getWeight(weight_idx[BNParams::beta]);
- Tensor &input_ = net_input[0]->getVariableRef();
- Tensor &hidden_ = net_hidden[0]->getVariableRef();
+ Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
+ Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
if (training) {
+ /**
+ * @todo support average with preallocated tensors,
+ * and then register cmu as a temporary tensor
+ */
Tensor cmu = input_.average(axes_to_reduce);
deviation = input_.subtract(cmu);
hidden_.add_i(beta);
}
-void BatchNormalizationLayer::calcDerivative() {
+void BatchNormalizationLayer::calcDerivative(RunLayerContext &context) {
- Tensor &gamma = weightAt(BNParams::gamma).getVariableRef();
- Tensor &deriv = net_hidden[0]->getGradientRef();
+ Tensor &gamma = context.getWeight(weight_idx[BNParams::gamma]);
+ Tensor &deriv = context.getIncomingDerivative(SINGLE_INOUT_IDX);
+ Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
int N = 1;
+ const Tensor &input = context.getInput(SINGLE_INOUT_IDX);
+ const TensorDim &in_dim = input.getDim();
for (auto &axis : axes_to_reduce) {
- N *= input_dim[0].getTensorDim(axis);
+ N *= in_dim.getTensorDim(axis);
}
Tensor dx_1 = gamma.multiply(invstd);
dx_2.subtract_i(deviation.divide(cvar).multiply(
deviation.multiply(deriv).sum(axes_to_reduce)));
- Tensor &dx = net_input[0]->getGradientRef();
dx = dx_2.multiply(dx_1, dx);
dx.divide_i(N);
}
-void BatchNormalizationLayer::calcGradient() {
+void BatchNormalizationLayer::calcGradient(RunLayerContext &context) {
- Tensor &dgamma = weightAt(BNParams::gamma).getGradientRef();
- Tensor &dbeta = weightAt(BNParams::beta).getGradientRef();
- Tensor &deriv = net_hidden[0]->getGradientRef();
+ Tensor &dgamma = context.getWeightGrad(weight_idx[BNParams::gamma]);
+ Tensor &dbeta = context.getWeightGrad(weight_idx[BNParams::beta]);
+ Tensor &deriv = context.getIncomingDerivative(SINGLE_INOUT_IDX);
dbeta = deriv.sum(axes_to_reduce);
Tensor dev = deviation.multiply(invstd);
dgamma = dev.sum(axes_to_reduce);
}
-void BatchNormalizationLayer::copy(std::shared_ptr<LayerV1> l) {
- LayerV1::copy(l);
-
- std::shared_ptr<BatchNormalizationLayer> from =
- std::static_pointer_cast<BatchNormalizationLayer>(l);
- this->cvar.copy(from->cvar);
-}
-
} /* namespace nntrainer */
#include <functional>
#include <vector>
-#include <layer_internal.h>
-#include <tensor.h>
+#include <layer_devel.h>
namespace nntrainer {
* @class BatchNormalizationLayer
* @brief Batch Noramlization Layer
*/
-class BatchNormalizationLayer : public LayerV1 {
+class BatchNormalizationLayer : public Layer {
public:
/**
* @brief Constructor of Batch Noramlization Layer
*/
- template <typename... Args>
BatchNormalizationLayer(
int axis = -1, float momentum = 0.99, float epsilon = 0.001,
WeightInitializer moving_mean_initializer = WeightInitializer::WEIGHT_ZEROS,
WeightInitializer moving_variance_initializer =
WeightInitializer::WEIGHT_ZEROS,
WeightInitializer gamma_initializer = WeightInitializer::WEIGHT_ONES,
- WeightInitializer beta_initializer = WeightInitializer::WEIGHT_ONES,
- Args... args) :
- LayerV1(args...),
+ WeightInitializer beta_initializer = WeightInitializer::WEIGHT_ONES) :
+ Layer(),
epsilon(epsilon),
momentum(momentum),
axis(axis),
BatchNormalizationLayer &operator=(BatchNormalizationLayer &&rhs) = default;
/**
- * @copydoc Layer::forwarding(bool training)
+ * @copydoc Layer::finalize(InitLayerContext &context)
*/
- void forwarding(bool training = true) override;
+ void finalize(InitLayerContext &context) override;
/**
- * @copydoc Layer::calcDerivative()
+ * @copydoc Layer::forwarding(RunLayerContext &context, bool training)
*/
- void calcDerivative() override;
+ void forwarding(RunLayerContext &context, bool training) override;
/**
- * @copydoc Layer::calcGradient()
+ * @copydoc Layer::calcDerivative(RunLayerContext &context)
*/
- void calcGradient() override;
+ void calcDerivative(RunLayerContext &context) override;
/**
- * @brief copy layer
- * @param[in] l layer to copy
+ * @copydoc Layer::calcGradient(RunLayerContext &context)
*/
- void copy(std::shared_ptr<LayerV1> l) override;
+ void calcGradient(RunLayerContext &context) override;
/**
- * @brief initialize layer
- * @retval #ML_ERROR_NONE Successful.
- * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+ * @copydoc Layer::exportTo(Exporter &exporter, ExportMethods method)
*/
- int initialize(Manager &manager) override;
+ void exportTo(Exporter &exporter,
+ const ExportMethods &method) const override {
+ Layer::exportTo(exporter, method);
+ }
/**
* @copydoc Layer::getType()
*/
const std::string getType() const override {
return BatchNormalizationLayer::type;
- }
+ };
/**
- * @copydoc Layer::supportInPlace()
+ * @copydoc Layer::supportBackwarding()
*/
- bool supportInPlace() const override { return true; }
+ bool supportBackwarding() const { return true; }
- using LayerV1::setProperty;
+ using Layer::setProperty;
/**
* @copydoc Layer::setProperty(const PropertyType type, const std::string
* &value)
*/
- void setProperty(const PropertyType type,
- const std::string &value = "") override;
+ void setProperty(const std::vector<std::string> &values) override;
+
+ /**
+ * @copydoc Layer::supportInPlace()
+ */
+ bool supportInPlace() const override { return true; }
inline static const std::string type = "batch_normalization";
std::vector<unsigned int> axes_to_reduce; /**< target axes to reduce */
std::array<WeightInitializer, 4> initializers; /**< weight initializers */
+ std::array<unsigned int, 4> weight_idx; /**< indices of the weights */
+
+ /**
+ * @brief setProperty by type and value separated
+ * @param[in] type property type to be passed
+ * @param[in] value value to be passed
+ * @exception exception::not_supported when property type is not valid for
+ * the particular layer
+ * @exception std::invalid_argument invalid argument
+ */
+ void setProperty(const std::string &type, const std::string &value);
};
} // namespace nntrainer
*/
#include <fc_layer.h>
-#include <layer_internal.h>
#include <lazy_tensor.h>
#include <nntrainer_error.h>
#include <nntrainer_log.h>
+#include <node_exporter.h>
#include <parse_util.h>
#include <util_func.h>
void FullyConnectedLayer::exportTo(Exporter &exporter,
const ExportMethods &method) const {
- Layer::exportTo(exporter, method);
+ LayerImpl::exportTo(exporter, method);
exporter.saveResult(fc_props, method, this);
}
inline static const std::string type = "fully_connected";
private:
- std::tuple<props::Unit> fc_props;
- std::array<unsigned int, 2> weight_idx;
+ std::tuple<props::Unit>
+ fc_props; /**< fc layer properties : unit - number of output neurons */
+ std::array<unsigned int, 2> weight_idx; /**< indices of the weights */
};
} // namespace nntrainer
} break;
default:
std::string msg =
- "[Layer] Unknown Layer Property Key for value " + std::string(value);
+ "[InputLayer] Unknown Layer Property Key for value " + std::string(value);
throw exception::not_supported(msg);
}
}
return weights[idx]->getName();
}
+ /**
+ * @brief check if the weight has gradient
+ *
+ * @param idx Identifier of the weight
+ * @return true if weight has gradient, else false
+ */
+ bool weightHasGradient(unsigned int idx) const {
+ return weights[idx]->getTrainable();
+ }
+
/**
* @brief Get the Output tensor object
*
*/
Weight getWeightWrapper(unsigned int idx) {
if (layerv1 == nullptr) {
- return Weight(run_context.getWeight(idx), run_context.getWeightGrad(idx),
- run_context.getWeightName(idx));
+ if (run_context.weightHasGradient(idx)) {
+ return Weight(run_context.getWeight(idx),
+ run_context.getWeightGrad(idx),
+ run_context.getWeightName(idx));
+ } else {
+ return Weight(run_context.getWeight(idx), Tensor(),
+ run_context.getWeightName(idx));
+ }
} else {
return getLayer()->getWeightsRef()[idx];
}
* @author Jijoong Moon <jijoong.moon@samsung.com>
* @bug No known bugs except for NYI items
*
+ * @todo deprecate new tensor allocation for out of place operations.
*/
#ifndef __TENSOR_H__
const std::string &n = "") :
dim(v.getDim()),
var(std::make_shared<Tensor>(v.getSharedDataTensor(dim, 0, false))),
- grad(std::make_shared<Tensor>(g.getSharedDataTensor(dim, 0, false))),
+ grad(std::make_shared<Tensor>()),
need_gradient(!g.uninitialized()),
alloc_now(v.isAllocated()),
- name(n) {}
+ name(n) {
+ if (trainable)
+ grad = std::make_shared<Tensor>(g.getSharedDataTensor(dim, 0, false));
+ }
/**
* @brief Copy constructor for Var_Grad
'unittest_layers_input.cpp',
'unittest_layers_loss.cpp',
'unittest_layers_fully_connected.cpp',
+ 'unittest_layers_batch_normalization.cpp',
]
exe = executable(
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
+ *
+ * @file unittest_layers_loss.cpp
+ * @date 15 June 2021
+ * @brief Batch Normalization Layer Test
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Parichay Kapoor <pk.kapoor@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include <bn_layer.h>
+#include <layers_common_tests.h>
+
+auto semantic_bn = LayerSemanticsParamType(
+ nntrainer::createLayer<nntrainer::BatchNormalizationLayer>,
+ nntrainer::BatchNormalizationLayer::type, {}, {}, 0, false);
+
+INSTANTIATE_TEST_CASE_P(BatchNormalization, LayerSemantics,
+ ::testing::Values(semantic_bn));
mkIniTc("basic5_p", {nw_base_cross, adam, input, out+"input_layers=inputlayer"}, SUCCESS),
mkIniTc("basic6_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer"}, SUCCESS),
mkIniTc("basic_act_p", {nw_base_cross, sgd, input + "-Activation", act_relu+"input_layers=inputlayer", out+"input_layers=activation_relu" }, SUCCESS),
- // mkIniTc("basic_bn_p", {nw_base_cross, sgd, input + "-Activation", batch_normal+"input_layers=inputlayer", act_relu+"input_layers=bn", out+"input_layers=activation_relu" }, SUCCESS),
- // mkIniTc("basic_bn2_p", {nw_base_cross, sgd, input + "-Activation", batch_normal + "Activation = relu"+"input_layers=inputlayer", out+"input_layers=bn" }, SUCCESS),
+ mkIniTc("basic_bn_p", {nw_base_cross, sgd, input + "-Activation", batch_normal+"input_layers=inputlayer", act_relu+"input_layers=bn", out+"input_layers=activation_relu" }, SUCCESS),
+ mkIniTc("basic_bn2_p", {nw_base_cross, sgd, input + "-Activation", batch_normal + "Activation = relu"+"input_layers=inputlayer", out+"input_layers=bn" }, SUCCESS),
mkIniTc("basic_dataset_p", {nw_base_cross, adam, dataset, input, out+"input_layers=inputlayer"}, SUCCESS),
mkIniTc("basic_dataset2_p", {nw_base_cross, sgd, input, out+"input_layers=inputlayer", dataset}, SUCCESS),
mkIniTc("basic_dataset3_p", {dataset, nw_base_cross, sgd, input, out+"input_layers=inputlayer"}, SUCCESS),
nntrainerModelAutoTests, nntrainerModelTest, ::testing::Values(
mkModelTc(fc_sigmoid_mse, "3:1:1:10", 1),
mkModelTc(fc_sigmoid_cross, "3:1:1:10", 1),
- mkModelTc(fc_relu_mse, "3:1:1:2", 1)
- // mkModelTc(fc_bn_sigmoid_cross, "3:1:1:10", 10),
- // mkModelTc(fc_bn_sigmoid_mse, "3:1:1:10", 10),
+ mkModelTc(fc_relu_mse, "3:1:1:2", 1),
+ mkModelTc(fc_bn_sigmoid_cross, "3:1:1:10", 10),
+ mkModelTc(fc_bn_sigmoid_mse, "3:1:1:10", 10)
// mkModelTc(mnist_conv_cross, "3:1:1:10", 10),
// mkModelTc(mnist_conv_cross_one_input, "1:1:1:10", 10),