[layers] Make requested weight/tensor names unique
authorParichay Kapoor <pk.kapoor@samsung.com>
Wed, 4 Aug 2021 10:37:27 +0000 (19:37 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Fri, 20 Aug 2021 08:15:58 +0000 (17:15 +0900)
Update the layers to make the requested weights and tensors names to be
unique. This done using the layer name with the postfix of the
weights/tensors name making the combination unqiue as layer name is
unique.

InitLayerContext is updated to provide access to the layer name while
finalizing the layer.

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
13 files changed:
Applications/SimpleShot/layers/centroid_knn.cpp
nntrainer/layers/bn_layer.cpp
nntrainer/layers/conv2d_layer.cpp
nntrainer/layers/dropout.cpp
nntrainer/layers/embedding.cpp
nntrainer/layers/fc_layer.cpp
nntrainer/layers/gru.cpp
nntrainer/layers/layer_context.h
nntrainer/layers/layer_node.cpp
nntrainer/layers/lstm.cpp
nntrainer/layers/pooling2d_layer.cpp
nntrainer/layers/rnn.cpp
test/unittest/layers/layers_standalone_common_tests.cpp

index 4951541bc2ee58606eb9c5129a5827cb0364078c..fa6db7a53a59d77cfa96cef347d671447f8cfdfa 100644 (file)
@@ -71,13 +71,15 @@ void CentroidKNN::finalize(nntrainer::InitLayerContext &context) {
   /// samples seen for the current run to calculate the centroid
   auto samples_seen = nntrainer::TensorDim({num_class});
 
-  weight_idx[KNNParams::map] = context.requestWeight(
-    map_dim, nntrainer::Tensor::Initializer::ZEROS,
-    nntrainer::WeightRegularizer::NONE, 1.0f, "centroidNN:map", false);
-
-  weight_idx[KNNParams::num_samples] = context.requestWeight(
-    samples_seen, nntrainer::Tensor::Initializer::ZEROS,
-    nntrainer::WeightRegularizer::NONE, 1.0f, "centroidNN:num_samples", false);
+  weight_idx[KNNParams::map] =
+    context.requestWeight(map_dim, nntrainer::Tensor::Initializer::ZEROS,
+                          nntrainer::WeightRegularizer::NONE, 1.0f,
+                          context.getName() + ":map", false);
+
+  weight_idx[KNNParams::num_samples] =
+    context.requestWeight(samples_seen, nntrainer::Tensor::Initializer::ZEROS,
+                          nntrainer::WeightRegularizer::NONE, 1.0f,
+                          context.getName() + ":num_samples", false);
 }
 
 void CentroidKNN::forwarding(nntrainer::RunLayerContext &context,
index fd08f5b32e5b1f1e4c8c2903efe6aedd42113ef0..91e10a0db5eea592bb8db4bedb442bcac21e82b3 100644 (file)
@@ -60,22 +60,22 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
       axes_to_reduce.push_back(i);
   }
 
-  wt_idx[BNParams::mu] = context.requestWeight(dim, initializers[BNParams::mu],
-                                               WeightRegularizer::NONE, 1.0f,
-                                               "BN::moving_mean", false);
+  wt_idx[BNParams::mu] = context.requestWeight(
+    dim, initializers[BNParams::mu], WeightRegularizer::NONE, 1.0f,
+    context.getName() + ":moving_mean", false);
   wt_idx[BNParams::var] = context.requestWeight(
     dim, initializers[BNParams::var], WeightRegularizer::NONE, 1.0f,
     "BN::moving_variance", false);
-  wt_idx[BNParams::gamma] =
-    context.requestWeight(dim, initializers[BNParams::gamma],
-                          WeightRegularizer::NONE, 1.0f, "BN::gamma", true);
-  wt_idx[BNParams::beta] =
-    context.requestWeight(dim, initializers[BNParams::beta],
-                          WeightRegularizer::NONE, 1.0f, "BN::beta", true);
+  wt_idx[BNParams::gamma] = context.requestWeight(
+    dim, initializers[BNParams::gamma], WeightRegularizer::NONE, 1.0f,
+    context.getName() + ":gamma", true);
+  wt_idx[BNParams::beta] = context.requestWeight(
+    dim, initializers[BNParams::beta], WeightRegularizer::NONE, 1.0f,
+    context.getName() + ":beta", true);
 
   wt_idx[BNParams::deviation] =
-    context.requestTensor(in_dim, "BN::deviation", Tensor::Initializer::NONE,
-                          false, ITERATION_LIFESPAN);
+    context.requestTensor(in_dim, context.getName() + ":deviation",
+                          Tensor::Initializer::NONE, false, ITERATION_LIFESPAN);
 }
 
 void BatchNormalizationLayer::setProperty(
index 65e3f0b2431f4ddf1fd543d9f74dd8e43b17fd52..480e491f46e00ef578093c71036ffdfc9b6c7c46 100644 (file)
@@ -288,12 +288,12 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
 
   padding = std::get<props::Padding2D>(conv_props).compute(in_dim, dim);
 
-  wt_idx[ConvParams::weight] =
-    context.requestWeight(dim, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "Conv2d:filter", true);
+  wt_idx[ConvParams::weight] = context.requestWeight(
+    dim, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":filter", true);
   wt_idx[ConvParams::bias] =
     context.requestWeight(bias_dim, bias_initializer, WeightRegularizer::NONE,
-                          1.0f, "Conv2d:bias", true);
+                          1.0f, context.getName() + ":bias", true);
 
   /// we don't have same padding for now but later, same padding don't apply
   /// when kernel size is even in current implementation (we need to handle
@@ -325,10 +325,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
   }
 
   wt_idx[ConvParams::im2col_result] = context.requestTensor(
-    calcIm2ColOutputDim(in_dim, dim, padding, stride, {1, 1}), "Conv2d:im2col",
-    Tensor::Initializer::NONE, false, ITERATION_LIFESPAN);
+    calcIm2ColOutputDim(in_dim, dim, padding, stride, {1, 1}),
+    context.getName() + ":im2col", Tensor::Initializer::NONE, false,
+    ITERATION_LIFESPAN);
   wt_idx[ConvParams::col2im_result] = context.requestTensor(
-    calcCol2ImOutputDim(out_dim, dim), "Conv2d:col2im",
+    calcCol2ImOutputDim(out_dim, dim), context.getName() + ":col2im",
     Tensor::Initializer::NONE, false, BACKWARD_FUNC_LIFESPAN);
 }
 
index a7f55a70ea70e59807fb22bdca61368736ccc66e..02dba0bb0e736bc8f0befd98bd4f827a10d884c8 100644 (file)
@@ -27,8 +27,9 @@ void DropOutLayer::finalize(InitLayerContext &context) {
 
   mask_idx.reserve(input_dims.size());
   for (auto &t : input_dims) {
-    mask_idx.push_back(context.requestTensor(
-      t, "DropoutMask", Tensor::Initializer::NONE, false, ITERATION_LIFESPAN));
+    mask_idx.push_back(context.requestTensor(t, context.getName() + ":Mask",
+                                             Tensor::Initializer::NONE, false,
+                                             ITERATION_LIFESPAN));
   }
 }
 
index cf05ea25425c5ab51c05ba42945480934d4fc29b..0a3c5f5dc62e0a694dec30468c9f89fe9c74fea1 100644 (file)
@@ -46,9 +46,9 @@ void EmbeddingLayer::finalize(InitLayerContext &context) {
   dim.width(out_dim);
   dim.batch(1);
 
-  weight_idx =
-    context.requestWeight(dim, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "Embedding", true);
+  weight_idx = context.requestWeight(
+    dim, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":Embedding", true);
 }
 
 void EmbeddingLayer::setProperty(const std::vector<std::string> &values) {
index 23b726a67f8b2d1464809477947149af591cf43e..58102fbdf3ce394728cc2dbb5ea49a41f652371f 100644 (file)
@@ -59,12 +59,13 @@ void FullyConnectedLayer::finalize(InitLayerContext &context) {
   TensorDim bias_dim(1, 1, 1, unit, 0b0001);
   TensorDim weight_dim(1, 1, in_dim.width(), unit, 0b0011);
 
-  weight_idx[FCParams::weight] =
-    context.requestWeight(weight_dim, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "FC:weight", true);
+  weight_idx[FCParams::weight] = context.requestWeight(
+    weight_dim, weight_initializer, weight_regularizer,
+    weight_regularizer_constant, context.getName() + ":weight", true);
 
-  weight_idx[FCParams::bias] = context.requestWeight(
-    bias_dim, bias_initializer, WeightRegularizer::NONE, 1.0f, "FC:bias", true);
+  weight_idx[FCParams::bias] =
+    context.requestWeight(bias_dim, bias_initializer, WeightRegularizer::NONE,
+                          1.0f, context.getName() + ":bias", true);
 }
 
 void FullyConnectedLayer::exportTo(Exporter &exporter,
index 7b60e61611fe7f89a4a0352f8af2f35be8ff3241..07e9481ed4908821a1069fda43eb5d2510199125 100644 (file)
@@ -103,32 +103,34 @@ void GRULayer::finalize(InitLayerContext &context) {
   // weight_initializer can be set sepeartely. weight_xh initializer,
   // weight_hh initializer kernel initializer & recurrent_initializer in keras
   // for now, it is set same way.
-  wt_idx[GRUParams::weight_xh] =
-    context.requestWeight(dim_xh, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "GRU:weight_xh", true);
-  wt_idx[GRUParams::weight_hh] =
-    context.requestWeight(dim_hh, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "GRU:weight_hh", true);
+  wt_idx[GRUParams::weight_xh] = context.requestWeight(
+    dim_xh, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":weight_xh", true);
+  wt_idx[GRUParams::weight_hh] = context.requestWeight(
+    dim_hh, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":weight_hh", true);
   wt_idx[GRUParams::bias_h] =
     context.requestWeight(bias_dim, bias_initializer, WeightRegularizer::NONE,
-                          1.0f, "GRU:bias_h", true);
+                          1.0f, context.getName() + ":bias_h", true);
 
   TensorDim d = input_dim;
   d.width(unit);
 
-  wt_idx[GRUParams::hidden_state] = context.requestTensor(
-    d, "GRU:hidden_state", Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
+  wt_idx[GRUParams::hidden_state] =
+    context.requestTensor(d, context.getName() + ":hidden_state",
+                          Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
 
   d.width(unit * NUM_GATE);
-  wt_idx[GRUParams::zrg] = context.requestTensor(
-    d, "GRU:zrg", Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
+  wt_idx[GRUParams::zrg] =
+    context.requestTensor(d, context.getName() + ":zrg",
+                          Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
 
   TensorDim h_dim = TensorDim();
   h_dim.setTensorDim(3, unit);
   h_dim.batch(input_dim.batch());
-  wt_idx[GRUParams::h_prev] =
-    context.requestTensor(h_dim, "GRU:h_prev", Tensor::Initializer::NONE, false,
-                          FORWARD_FUNC_LIFESPAN);
+  wt_idx[GRUParams::h_prev] = context.requestTensor(
+    h_dim, context.getName() + ":h_prev", Tensor::Initializer::NONE, false,
+    FORWARD_FUNC_LIFESPAN);
 
   if (hidden_state_activation_type == ActivationType::ACT_NONE) {
     hidden_state_activation_type = ActivationType::ACT_TANH;
index 5c3d3c4afa97eda088a1a73cdf967b89a4969b3e..74f48aa0e7c93f633b3862fc9793c409cd8bac23 100644 (file)
@@ -67,9 +67,13 @@ public:
    *
    * @param dim Input dimensions for the layer
    */
-  InitLayerContext(const std::vector<TensorDim> &dim, unsigned int num_out) :
+  InitLayerContext(const std::vector<TensorDim> &dim, unsigned int num_out,
+                   const std::string &n = "") :
     input_dim(dim),
-    num_outputs(num_out) {}
+    num_outputs(num_out),
+    name(n) {}
+
+  const std::string &getName() const { return name; }
 
   /**
    * @brief Get the number of inputs for the layer
@@ -279,6 +283,9 @@ public:
       }
     }
 
+    if (name.empty())
+      return false;
+
     return true;
   }
 
@@ -292,6 +299,7 @@ private:
                      variables) */
 
   unsigned int num_outputs; /**< number of outputs for the layer */
+  std::string name;         /**< name of the layer */
 };
 
 /**
index a700d683065b16c89b99f880c311df9335ba9653..edd3d492af811a32f1f898025d0ceab981de078d 100644 (file)
@@ -351,6 +351,8 @@ void LayerNode::finalize() {
   if (finalized)
     throw std::runtime_error("Finalizing a layer which is already finalized");
 
+  init_context = InitLayerContext(init_context.getInputDimensions(),
+                                  init_context.getNumOutputs(), getName());
   if (!init_context.validate())
     throw std::invalid_argument(
       "Invalid init context for finalizing the layer");
index a5eb0889221dc1e5593ffaa4265dd058e2340e27..0eb455b5591d81be964f6114ee6abfdd79933936 100644 (file)
@@ -62,8 +62,8 @@ void LSTMLayer::finalize(InitLayerContext &context) {
 
   if (dropout_rate > epsilon) {
     wt_idx[LSTMParams::dropout_mask] = context.requestTensor(
-      output_dim, "LSTM:dropout_mask", Tensor::Initializer::NONE, false,
-      ITERATION_LIFESPAN);
+      output_dim, context.getName() + ":dropout_mask",
+      Tensor::Initializer::NONE, false, ITERATION_LIFESPAN);
   }
 
   if (!return_sequences) {
@@ -88,28 +88,30 @@ void LSTMLayer::finalize(InitLayerContext &context) {
   // weight_initializer can be set sepeartely. weight_xh initializer,
   // weight_hh initializer kernel initializer & recurrent_initializer in keras
   // for now, it is set same way.
-  wt_idx[LSTMParams::weight_xh] =
-    context.requestWeight(dim_xh, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "LSTM:weight_xh", true);
-  wt_idx[LSTMParams::weight_hh] =
-    context.requestWeight(dim_hh, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "LSTM:weight_hh", true);
+  wt_idx[LSTMParams::weight_xh] = context.requestWeight(
+    dim_xh, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":weight_xh", true);
+  wt_idx[LSTMParams::weight_hh] = context.requestWeight(
+    dim_hh, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":weight_hh", true);
   wt_idx[LSTMParams::bias_h] =
     context.requestWeight(bias_dim, bias_initializer, WeightRegularizer::NONE,
-                          1.0f, "LSTM:bias_h", true);
+                          1.0f, context.getName() + ":bias_h", true);
 
   TensorDim d = input_dim;
   d.width(unit);
 
   wt_idx[LSTMParams::hidden_state] =
-    context.requestTensor(d, "LSTM:hidden_state", Tensor::Initializer::NONE,
-                          true, ITERATION_LIFESPAN);
-  wt_idx[LSTMParams::mem_cell] = context.requestTensor(
-    d, "LSTM:mem_cell", Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
+    context.requestTensor(d, context.getName() + ":hidden_state",
+                          Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
+  wt_idx[LSTMParams::mem_cell] =
+    context.requestTensor(d, context.getName() + ":mem_cell",
+                          Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
 
   d.width(unit * NUM_GATE);
-  wt_idx[LSTMParams::fgio] = context.requestTensor(
-    d, "LSTM:fgio", Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
+  wt_idx[LSTMParams::fgio] =
+    context.requestTensor(d, context.getName() + ":fgio",
+                          Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
 
   if (hidden_state_activation_type == ActivationType::ACT_NONE) {
     hidden_state_activation_type = ActivationType::ACT_TANH;
index 777b5813a6e7e443089f7f8d4233818d438a49d9..494f7b9f2966d061175f511273cd98180c3a5294 100644 (file)
@@ -98,14 +98,14 @@ void Pooling2DLayer::finalize(InitLayerContext &context) {
    * // clang-format on
    */
   if (pooling_type == PoolingType::global_max) {
-    pool_helper_idx = context.requestTensor(in_dim, "Pooling2d::helper_idx",
-                                            Tensor::Initializer::NONE, false,
-                                            ITERATION_LIFESPAN);
+    pool_helper_idx = context.requestTensor(
+      in_dim, context.getName() + ":helper_idx", Tensor::Initializer::NONE,
+      false, ITERATION_LIFESPAN);
     pool_helper_size.resize(in_dim.batch() * in_dim.channel());
   } else {
-    pool_helper_idx = context.requestTensor(out_dim, "Pooling2d::helper_idx",
-                                            Tensor::Initializer::NONE, false,
-                                            ITERATION_LIFESPAN);
+    pool_helper_idx = context.requestTensor(
+      out_dim, context.getName() + ":helper_idx", Tensor::Initializer::NONE,
+      false, ITERATION_LIFESPAN);
   }
 }
 
index 1785f0c6afc78b4f11c19ff94daba8f051089cc3..5c204da8aa76d2385505ee11109894db806b4c64 100644 (file)
@@ -49,8 +49,8 @@ void RNNLayer::finalize(InitLayerContext &context) {
 
   if (dropout_rate > epsilon) {
     wt_idx[RNNParams::dropout_mask] = context.requestTensor(
-      output_dim, "RNN:dropout_mask", Tensor::Initializer::NONE, false,
-      ITERATION_LIFESPAN);
+      output_dim, context.getName() + ":dropout_mask",
+      Tensor::Initializer::NONE, false, ITERATION_LIFESPAN);
   }
 
   if (!return_sequences) {
@@ -74,23 +74,24 @@ void RNNLayer::finalize(InitLayerContext &context) {
   // weight_hh initializer kernel initializer & recurrent_initializer in keras
   // for now, it is set same way.
 
-  wt_idx[RNNParams::weight_xh] =
-    context.requestWeight(dim_xh, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "RNN:weight_xh", true);
-  wt_idx[RNNParams::weight_hh] =
-    context.requestWeight(dim_hh, weight_initializer, weight_regularizer,
-                          weight_regularizer_constant, "RNN:weight_hh", true);
+  wt_idx[RNNParams::weight_xh] = context.requestWeight(
+    dim_xh, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":weight_xh", true);
+  wt_idx[RNNParams::weight_hh] = context.requestWeight(
+    dim_hh, weight_initializer, weight_regularizer, weight_regularizer_constant,
+    context.getName() + ":weight_hh", true);
   wt_idx[RNNParams::bias_h] =
     context.requestWeight(bias_dim, bias_initializer, WeightRegularizer::NONE,
-                          1.0f, "RNN:bias_h", true);
+                          1.0f, context.getName() + ":bias_h", true);
 
   // We do not need this if we reuse net_hidden[0]. But if we do, then the unit
   // test will fail. Becuase it modifies the date during gradient calculation
   // TODO : We could control with something like #define test to save memory
   TensorDim d = input_dim;
   d.width(unit);
-  wt_idx[RNNParams::hidden_state] = context.requestTensor(
-    d, "RNN:hidden_state", Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
+  wt_idx[RNNParams::hidden_state] =
+    context.requestTensor(d, context.getName() + ":hidden_state",
+                          Tensor::Initializer::NONE, true, ITERATION_LIFESPAN);
 
   if (hidden_state_activation_type == ActivationType::ACT_NONE) {
     hidden_state_activation_type = ActivationType::ACT_TANH;
index a1bd1aad9b617b10219cf1ba76f711c2c98bfb68..0d8c42dc809a5a619869d368f46c975af58e1290 100644 (file)
@@ -44,7 +44,7 @@ TEST_P(LayerSemantics, setPropertiesValidInvalidOnly_n) {}
 TEST_P(LayerSemantics, finalizeValidate_p) {
   nntrainer::TensorDim in_dim({1, 1, 1, 1});
   nntrainer::InitLayerContext init_context =
-    nntrainer::InitLayerContext({in_dim}, 1);
+    nntrainer::InitLayerContext({in_dim}, 1, "layer");
   EXPECT_EQ(init_context.validate(), true);
 
   // set necessary properties only
@@ -84,7 +84,7 @@ TEST_P(LayerSemantics, gettersValidate_p) {
 TEST_P(LayerSemantics, setBatchValidate_p) {
   nntrainer::TensorDim in_dim({1, 1, 1, 1});
   nntrainer::InitLayerContext init_context =
-    nntrainer::InitLayerContext({in_dim}, 1);
+    nntrainer::InitLayerContext({in_dim}, 1, "layer");
   init_context.validate();
 
   // set necessary properties only