From fa831f211cfb2be503104fc11aa0b0fb49f78fba Mon Sep 17 00:00:00 2001 From: Parichay Kapoor Date: Wed, 6 Oct 2021 16:32:47 +0900 Subject: [PATCH] [layer] Add checks for layer tensor overwrite bug This patch adds a check to ensure that when layer tensors are created and overwrites existing tensors. These checks are enabled only in DEBUG mode to ensure that they only run in CI mode, and are called after each operation - forwarding, calcGradient, and calcDerivative. Signed-off-by: Parichay Kapoor --- Applications/Custom/pow.cpp | 6 +- nntrainer/layers/bn_layer.cpp | 2 +- nntrainer/layers/conv2d_layer.cpp | 2 +- nntrainer/layers/layer_context.cpp | 99 +++++++++++++++++++++++++++++ nntrainer/layers/layer_context.h | 31 +++++++++ nntrainer/layers/layer_node.cpp | 18 ++++++ nntrainer/tensor/tensor.cpp | 3 +- test/unittest/unittest_nntrainer_models.cpp | 2 + 8 files changed, 157 insertions(+), 6 deletions(-) diff --git a/Applications/Custom/pow.cpp b/Applications/Custom/pow.cpp index df004ec..123d16e 100644 --- a/Applications/Custom/pow.cpp +++ b/Applications/Custom/pow.cpp @@ -92,8 +92,8 @@ void PowLayer::forwarding(nntrainer::RunLayerContext &context, bool training) { #endif /// net hidden are used to save var, - context.getOutput(SINGLE_INOUT_IDX) = - context.getInput(SINGLE_INOUT_IDX).pow(exponent); + context.getInput(SINGLE_INOUT_IDX) + .pow(exponent, context.getOutput(SINGLE_INOUT_IDX)); #ifdef DEBUG std::cout << "input: " << context.getInput(SINGLE_INOUT_IDX); @@ -112,7 +112,7 @@ void PowLayer::calcDerivative(nntrainer::RunLayerContext &context) { context.getIncomingDerivative(SINGLE_INOUT_IDX); nntrainer::Tensor &dx = context.getOutgoingDerivative(SINGLE_INOUT_IDX); - dx = derivative_.multiply(exponent); + derivative_.multiply(exponent, dx); #ifdef DEBUG std::cout << "input: " << context.getOutput(SINGLE_INOUT_IDX); diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp index c442af7..c1a54dd 100644 --- a/nntrainer/layers/bn_layer.cpp +++ b/nntrainer/layers/bn_layer.cpp @@ -160,7 +160,7 @@ void BatchNormalizationLayer::forwarding(RunLayerContext &context, mu.multiply_i(momentum); mu.add_i(t_reduced, 1 - momentum); - t_full = deviation.pow(2.0f); + deviation.pow(2.0f, t_full); t_full.average(axes_to_reduce, cvar); var.multiply_i(momentum); diff --git a/nntrainer/layers/conv2d_layer.cpp b/nntrainer/layers/conv2d_layer.cpp index 201df8a..7a8c5a2 100644 --- a/nntrainer/layers/conv2d_layer.cpp +++ b/nntrainer/layers/conv2d_layer.cpp @@ -483,7 +483,7 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) { } delK.reshape(filter_dim); - delBias = derivative.sum({0, 2, 3}); + derivative.sum({0, 2, 3}, delBias); } void Conv2DLayer::exportTo(Exporter &exporter, diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp index 40c85e9..3526d8c 100644 --- a/nntrainer/layers/layer_context.cpp +++ b/nntrainer/layers/layer_context.cpp @@ -10,6 +10,8 @@ * @brief This is the layer context for each layer */ +#include + #include #include #include @@ -29,6 +31,9 @@ RunLayerContext::RunLayerContext(const std::string &name, bool trainable, std::get(props).set(trainable); NNTR_THROW_IF(!readyToUse(), std::invalid_argument) << "run context is not ready to use upon creation"; + + if (!validate()) + throw std::invalid_argument("Creating invalid run context"); } /** @@ -112,6 +117,16 @@ Tensor &RunLayerContext::getOutputGrad(unsigned int idx) { } /** + * @brief check if the output has gradient + * + * @param idx Identifier of the output + * @return true if output has gradient, else false + */ +bool RunLayerContext::outputHasGradient(unsigned int idx) const { + return outputs[idx]->hasGradient(); +} + +/** * @brief Get the Output Grad tensor object * * @param idx Identifier of the output @@ -162,6 +177,16 @@ Tensor &RunLayerContext::getInputGrad(unsigned int idx) { } /** + * @brief check if the input has gradient + * + * @param idx Identifier of the input + * @return true if output has gradient, else false + */ +bool RunLayerContext::inputHasGradient(unsigned int idx) const { + return inputs[idx]->hasGradient(); +} + +/** * @brief Get the outgoing Derivative tensor object * * @param idx Identifier of the input @@ -308,4 +333,78 @@ bool RunLayerContext::readyToUse() const { return !inputs[0]->getVariable().empty(); } +/** + * @brief validates the run context after run + * + * @return true if ready, else false + */ +bool RunLayerContext::validate(bool skip_input, bool skip_label) { + /** + * @note a common mistake when using run_context is re-assigning the tensor + * references which leads to nasty bugs. This validation ensures that the + * tensors are not set mistakenly by verifying their unique names + */ +#ifdef ENABLE_TEST + if (tensor_map.empty() || !tensor_map[inputs[0]->getName()]) { + auto filler = [this](const auto &vec) { + for (auto const &val : vec) { + tensor_map[val->getName()] = val->getVariableRef().getData(); + tensor_map[val->getGradientName()] = val->getGradientRef().getData(); + } + }; + + /** fill the tensor map for the first validation */ + filler(weights); + filler(inputs); + filler(outputs); + filler(tensors); + } else { + auto matcher = [this](const Var_Grad *val, bool skip_grad = false) { + if (val->getName().empty() || + (val->hasGradient() && val->getGradientName().empty())) + return false; + + if (tensor_map.find(val->getName()) == tensor_map.end()) + /** + * Disabled because of in-place input layer. Enable this later. + * tensor_map[val->getName()] != val->getVariableRef().getData()) + */ + return false; + + if (skip_grad && + (tensor_map.find(val->getGradientName()) == tensor_map.end())) + return false; + + return true; + }; + + auto matcher_w = [this, matcher](const std::vector &vec) { + auto ret = true; + for (auto const &val : vec) + ret &= matcher(val); + return ret; + }; + + auto matcher_vw = [this, matcher](const std::vector &vec, + bool skip_grad = false) { + auto ret = true; + for (auto const &val : vec) + ret &= matcher(val, skip_grad); + return ret; + }; + + /** match the tensor map from the next validations */ + + auto ret = matcher_w(weights) & matcher_vw(tensors) & + matcher_vw(outputs, skip_label); + if (!skip_input) + ret &= matcher_vw(inputs); + + return ret; + } +#endif + + return true; +} + } // namespace nntrainer diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h index db27f5d..aeada49 100644 --- a/nntrainer/layers/layer_context.h +++ b/nntrainer/layers/layer_context.h @@ -384,6 +384,14 @@ public: Tensor &getOutputGradUnsafe(unsigned int idx); /** + * @brief check if the weight has gradient + * + * @param idx Identifier of the weight + * @return true if weight has gradient, else false + */ + bool outputHasGradient(unsigned int idx) const; + + /** * @brief Get the incoming Derivative tensor object * * @param idx Identifier of the output @@ -416,6 +424,14 @@ public: Tensor &getInputGrad(unsigned int idx); /** + * @brief check if the weight has gradient + * + * @param idx Identifier of the weight + * @return true if weight has gradient, else false + */ + bool inputHasGradient(unsigned int idx) const; + + /** * @brief Get the outgoing Derivative tensor object * * @param idx Identifier of the input @@ -589,6 +605,16 @@ public: */ bool readyToUse() const; + /** + * @brief validates the run context after run + * + * @param skip_input skip verifying the input + * @param skip_label skip verifying the label + * + * @return true if ready, else false + */ + bool validate(bool skip_input = false, bool skip_label = false); + private: std::tuple props; /**< props of the layer */ float loss; /**< loss of the layer */ @@ -598,6 +624,11 @@ private: std::vector outputs; /**< outputs of the layer */ std::vector tensors; /**< tensors of the layer */ +#ifdef ENABLE_TEST + std::map + tensor_map; /**< map of tensor name to tensor address */ +#endif + /** * @brief Get regularization loss for the weight * diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index 6eb9c6d..1a10b70 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -470,6 +470,12 @@ void LayerNode::forwarding(bool training) { layer->forwarding(*run_context, training); END_PROFILE(forward_event_key); +#ifdef ENABLE_TEST + if (!run_context->validate(getNumInputConnections() == 0, !requireLabel())) + throw std::runtime_error("Running forwarding() layer " + getName() + + " invalidated the context."); +#endif + /** add loss only for loss layers */ if (requireLabel()) loss->set(*loss + run_context->getLoss()); @@ -482,6 +488,12 @@ void LayerNode::calcDerivative() { START_PROFILE(calc_deriv_event_key); layer->calcDerivative(*run_context); END_PROFILE(calc_deriv_event_key); + +#ifdef ENABLE_TEST + if (!run_context->validate(getNumInputConnections() == 0, !requireLabel())) + throw std::runtime_error("Running calcDerivative() layer " + getName() + + " invalidated the context."); +#endif } /** @@ -492,6 +504,12 @@ void LayerNode::calcGradient() { if (getTrainable()) layer->calcGradient(*run_context); END_PROFILE(calc_grad_event_key); + +#ifdef ENABLE_TEST + if (!run_context->validate(getNumInputConnections() == 0, !requireLabel())) + throw std::runtime_error("Running calcGradient() layer " + getName() + + " invalidated the context."); +#endif } /** diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp index 6072034..d69758d 100644 --- a/nntrainer/tensor/tensor.cpp +++ b/nntrainer/tensor/tensor.cpp @@ -680,7 +680,7 @@ Tensor &Tensor::sum(unsigned int axis, Tensor &ret, float alpha) const { if (dim.getDim()[axis] == 1 and alpha == 1.0) { CREATE_IF_EMPTY_DIMS(ret, dim); - ret.copy(*this); + ret.copy(this->getData()); return ret; } @@ -1128,6 +1128,7 @@ void Tensor::copy(const Tensor &from) { Tensor Tensor::clone() const { Tensor t; t.copy(*this); + t.name = name; return t; } diff --git a/test/unittest/unittest_nntrainer_models.cpp b/test/unittest/unittest_nntrainer_models.cpp index 245a0ce..3a0adac 100644 --- a/test/unittest/unittest_nntrainer_models.cpp +++ b/test/unittest/unittest_nntrainer_models.cpp @@ -511,6 +511,8 @@ GraphWatcher::prepareData(std::ifstream &f, in.read(f); lb.read(f); + in.setName("input"); + lb.setName("label"); for (auto &i : nodes) { i.readLayerWeight(f); -- 2.7.4