[bn layer] Support non-trainable mode
authorParichay Kapoor <pk.kapoor@samsung.com>
Thu, 5 Nov 2020 04:55:13 +0000 (13:55 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Mon, 9 Nov 2020 00:42:38 +0000 (09:42 +0900)
Support non-trainable mode for bn layer when the parameters
for this layer are not updated.

**Self evaluation:**
1. Build test: [x]Passed [ ]Failed [ ]Skipped
2. Run test: [x]Passed [ ]Failed [ ]Skipped

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
nntrainer/layers/bn_layer.cpp

index 826f08b..08207cd 100644 (file)
@@ -126,7 +126,7 @@ sharedConstTensors BatchNormalizationLayer::forwarding(sharedConstTensors in) {
   Tensor &beta = weightAt(static_cast<int>(BNParams::beta)).getVariableRef();
 
   input = *in[0];
-  /// @todo change trainable #524
+  /// @todo change trainable to train/eval mode #524
   if (trainable) {
     Tensor cmu = input.average(axes_to_reduce);
     deviation = input.subtract(cmu);
@@ -165,14 +165,10 @@ BatchNormalizationLayer::backwarding(sharedConstTensors derivative,
   Tensor deriv = *derivative[0];
 
   int N = 1;
-
   for (auto &axis : axes_to_reduce) {
     N *= input_dim[0].getTensorDim(axis);
   }
 
-  dbeta = deriv.sum(axes_to_reduce);
-  dgamma = deviation.multiply(invstd).multiply(deriv).sum(axes_to_reduce);
-
   Tensor dx_1 = gamma.multiply(invstd);
   Tensor dx_2 = deriv.multiply(N);
   dx_2.subtract_i(deriv.sum(axes_to_reduce));
@@ -182,7 +178,11 @@ BatchNormalizationLayer::backwarding(sharedConstTensors derivative,
   Tensor dx = dx_2.multiply(dx_1);
   dx.divide_i(N);
 
-  opt->apply_gradients(weight_list, num_weights, iteration);
+  if (trainable) {
+    dbeta = deriv.sum(axes_to_reduce);
+    dgamma = deviation.multiply(invstd).multiply(deriv).sum(axes_to_reduce);
+    opt->apply_gradients(weight_list, num_weights, iteration);
+  }
 
   return {MAKE_SHARED_TENSOR(std::move(dx))};
 }