[Tensor] OutputGrad defaults to be zero if not given

author Jihoon Lee <jhoon.it.lee@samsung.com>

Tue, 21 Dec 2021 09:02:02 +0000 (18:02 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Wed, 29 Dec 2021 07:48:32 +0000 (16:48 +0900)
author Jihoon Lee <jhoon.it.lee@samsung.com>
Tue, 21 Dec 2021 09:02:02 +0000 (18:02 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Wed, 29 Dec 2021 07:48:32 +0000 (16:48 +0900)
diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp

index bcec2f4..e81d567 100644 (file)
--- a/nntrainer/layers/layer_context.cpp
+++ b/nntrainer/layers/layer_context.cpp
@@ -122,12 +122,12 @@ const Tensor &RunLayerContext::getOutput(unsigned int idx) const {
   * @brief Get the Output Grad tensor object
   *
   * @param idx Identifier of the output
- * @return Tensor& Reference to the output grad tensor
+ * @return Tensor Read-only output grad tensor
   */
-const Tensor &RunLayerContext::getOutputGrad(unsigned int idx) const {
-  if (!outputs[idx]->hasGradient())
-    throw std::invalid_argument(
-      "Requesting gradient for a non-trainable tensor.");
+const Tensor RunLayerContext::getOutputGrad(unsigned int idx) const {
+  if (!outputs[idx]->hasGradient()) {
+    return Tensor(outputs[idx]->getDim(), true, Tensor::Initializer::ZEROS);
+  }
    return const_cast<RunLayerContext *>(this)->getOutputGradUnsafe(idx);
  }
  
@@ -158,9 +158,9 @@ Tensor &RunLayerContext::getOutputGradUnsafe(unsigned int idx) {
   * @brief Get the incoming Derivative tensor object
   *
   * @param idx Identifier of the output
- * @return Tensor& Reference to the output derivative tensor
+ * @return Tensor tensor to incoming derivative. If
   */
-const Tensor &RunLayerContext::getIncomingDerivative(unsigned int idx) const {
+const Tensor RunLayerContext::getIncomingDerivative(unsigned int idx) const {
    return getOutputGrad(idx);
  }
  
diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h

index 9f20a59..a39969c 100644 (file)
--- a/nntrainer/layers/layer_context.h
+++ b/nntrainer/layers/layer_context.h
@@ -283,6 +283,7 @@ private:
    bool in_place;             /**< if the layer is expected to run in-place */
    float clip_by_global_norm; /**< max norm value for clip by norm */
  
+  std::vector<TensorSpecV2> output_spec;
    std::vector<WeightSpec> weights_spec; /**< Specification for the weights */
    std::vector<TensorSpec>
      tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
@@ -394,15 +395,17 @@ public:
     * @brief Get the Output Grad tensor object
     *
     * @param idx Identifier of the output
-   * @return Tensor& Reference to the output grad tensor
+   * @return Read-only output grad tensor, if derivative does not have
+   * gradient, return a temporary, initialized to zero
     */
-  const Tensor &getOutputGrad(unsigned int idx) const;
+  const Tensor getOutputGrad(unsigned int idx) const;
  
    /**
     * @brief Get the Output Grad tensor object
     *
     * @param idx Identifier of the output
-   * @return Tensor& Reference to the output grad tensor
+   * @return Tensor& Reference to the output grad tensor, this is valid only if
+   * the given output is trainable
     *
     * @note recommended to NOT use this function as a layer developer but rather
     * use getOutputGrad().
@@ -421,9 +424,10 @@ public:
     * @brief Get the incoming Derivative tensor object
     *
     * @param idx Identifier of the output
-   * @return Tensor& Reference to the output derivative tensor
+   * @return Tensor output derivative tensor, if derivative does not have
+   * gradient, return a temporary, initialized to zero
     */
-  const Tensor &getIncomingDerivative(unsigned int idx) const;
+  const Tensor getIncomingDerivative(unsigned int idx) const;
  
    /**
     * @brief Get the Input tensor object
diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h

index 72b8bc8..5538a88 100644 (file)
--- a/nntrainer/layers/layer_node.h
+++ b/nntrainer/layers/layer_node.h
@@ -553,7 +553,7 @@ public:
     * @param idx Identifier of the output
     * @return Tensor& Reference to the output grad tensor
     */
-  const Tensor &getOutputGrad(unsigned int idx) const {
+  const Tensor getOutputGrad(unsigned int idx) const {
      NNTR_THROW_IF(!run_context, std::runtime_error)
        << __func__ << " layer needs to be finalized first!";
      return run_context->getOutputGrad(idx);
author	Jihoon Lee <jhoon.it.lee@samsung.com>
	Tue, 21 Dec 2021 09:02:02 +0000 (18:02 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Wed, 29 Dec 2021 07:48:32 +0000 (16:48 +0900)
nntrainer/layers/layer_context.cpp		patch \| blob \| history
nntrainer/layers/layer_context.h		patch \| blob \| history
nntrainer/layers/layer_node.h		patch \| blob \| history