From: Jihoon Lee Date: Tue, 21 Dec 2021 09:02:02 +0000 (+0900) Subject: [Tensor] OutputGrad defaults to be zero if not given X-Git-Tag: accepted/tizen/unified/20220323.062643~58 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=76f9b9164c35681b0726369cc48dae7332362e52;p=platform%2Fcore%2Fml%2Fnntrainer.git [Tensor] OutputGrad defaults to be zero if not given This patch creates output grad(incoming) to all zero if the given output is not trainable. This will make the given output be considered as constant. If a user wants to check if output is constant-like(having zero gradient as it's partner), she can easily check if outputHasGradient **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Jihoon Lee --- diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp index bcec2f4..e81d567 100644 --- a/nntrainer/layers/layer_context.cpp +++ b/nntrainer/layers/layer_context.cpp @@ -122,12 +122,12 @@ const Tensor &RunLayerContext::getOutput(unsigned int idx) const { * @brief Get the Output Grad tensor object * * @param idx Identifier of the output - * @return Tensor& Reference to the output grad tensor + * @return Tensor Read-only output grad tensor */ -const Tensor &RunLayerContext::getOutputGrad(unsigned int idx) const { - if (!outputs[idx]->hasGradient()) - throw std::invalid_argument( - "Requesting gradient for a non-trainable tensor."); +const Tensor RunLayerContext::getOutputGrad(unsigned int idx) const { + if (!outputs[idx]->hasGradient()) { + return Tensor(outputs[idx]->getDim(), true, Tensor::Initializer::ZEROS); + } return const_cast(this)->getOutputGradUnsafe(idx); } @@ -158,9 +158,9 @@ Tensor &RunLayerContext::getOutputGradUnsafe(unsigned int idx) { * @brief Get the incoming Derivative tensor object * * @param idx Identifier of the output - * @return Tensor& Reference to the output derivative tensor + * @return Tensor tensor to incoming derivative. If */ -const Tensor &RunLayerContext::getIncomingDerivative(unsigned int idx) const { +const Tensor RunLayerContext::getIncomingDerivative(unsigned int idx) const { return getOutputGrad(idx); } diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h index 9f20a59..a39969c 100644 --- a/nntrainer/layers/layer_context.h +++ b/nntrainer/layers/layer_context.h @@ -283,6 +283,7 @@ private: bool in_place; /**< if the layer is expected to run in-place */ float clip_by_global_norm; /**< max norm value for clip by norm */ + std::vector output_spec; std::vector weights_spec; /**< Specification for the weights */ std::vector tensors_spec; /**< Specification for the var_grad (trainable/non-trainable @@ -394,15 +395,17 @@ public: * @brief Get the Output Grad tensor object * * @param idx Identifier of the output - * @return Tensor& Reference to the output grad tensor + * @return Read-only output grad tensor, if derivative does not have + * gradient, return a temporary, initialized to zero */ - const Tensor &getOutputGrad(unsigned int idx) const; + const Tensor getOutputGrad(unsigned int idx) const; /** * @brief Get the Output Grad tensor object * * @param idx Identifier of the output - * @return Tensor& Reference to the output grad tensor + * @return Tensor& Reference to the output grad tensor, this is valid only if + * the given output is trainable * * @note recommended to NOT use this function as a layer developer but rather * use getOutputGrad(). @@ -421,9 +424,10 @@ public: * @brief Get the incoming Derivative tensor object * * @param idx Identifier of the output - * @return Tensor& Reference to the output derivative tensor + * @return Tensor output derivative tensor, if derivative does not have + * gradient, return a temporary, initialized to zero */ - const Tensor &getIncomingDerivative(unsigned int idx) const; + const Tensor getIncomingDerivative(unsigned int idx) const; /** * @brief Get the Input tensor object diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h index 72b8bc8..5538a88 100644 --- a/nntrainer/layers/layer_node.h +++ b/nntrainer/layers/layer_node.h @@ -553,7 +553,7 @@ public: * @param idx Identifier of the output * @return Tensor& Reference to the output grad tensor */ - const Tensor &getOutputGrad(unsigned int idx) const { + const Tensor getOutputGrad(unsigned int idx) const { NNTR_THROW_IF(!run_context, std::runtime_error) << __func__ << " layer needs to be finalized first!"; return run_context->getOutputGrad(idx);