[fclayer] Update gradient to accumulate
authorJihoon Lee <jhoon.it.lee@samsung.com>
Tue, 5 Oct 2021 11:08:12 +0000 (20:08 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 7 Oct 2021 10:20:47 +0000 (19:20 +0900)
This patch update gradient calculation to accumulate for fully connected
layer.

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
nntrainer/layers/fc_layer.cpp
test/unittest/layers/layers_golden_tests.cpp

index add44d0..59a69a2 100644 (file)
@@ -103,7 +103,7 @@ void FullyConnectedLayer::calcDerivative(RunLayerContext &context) {
   Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
   Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
 
-  ret_ = derivative_.dot(weight, ret_, false, true);
+  derivative_.dot(weight, ret_, false, true);
 }
 
 void FullyConnectedLayer::calcGradient(RunLayerContext &context) {
@@ -113,8 +113,10 @@ void FullyConnectedLayer::calcGradient(RunLayerContext &context) {
   Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
   Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
 
-  derivative_.sum({0, 1, 2}, djdb);
-  input_.dot(derivative_, djdw, true, false);
+  /// @todo optimize below by adding beta to Tensor::sum
+  Tensor t = derivative_.sum({0, 1, 2});
+  djdb.add_i(t);
+  input_.dot(derivative_, djdw, true, false, 1.0f);
 }
 
 } /* namespace nntrainer */
index 26a6f99..a69c2d0 100644 (file)
@@ -103,6 +103,7 @@ static TensorPacks prepareTensors(const InitLayerContext &context,
       weights.emplace_back(spec, true);
       sizeCheckedReadTensor(weights.back().getVariableRef(), file,
                             weights.back().getName());
+      weights.back().getGradientRef().setZero();
     }
     return weights;
   };