[context] Receive out conn info and use
authorJihoon Lee <jhoon.it.lee@samsung.com>
Tue, 28 Dec 2021 07:42:20 +0000 (16:42 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Wed, 29 Dec 2021 07:48:32 +0000 (16:48 +0900)
Init Context receive out conn info (true if conn info exist, false if
not) and use it to determine given output is dangled

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
nntrainer/layers/layer_context.cpp
nntrainer/layers/layer_context.h
nntrainer/layers/layer_node.cpp
nntrainer/layers/time_dist.cpp
test/unittest/layers/layers_golden_tests.cpp
test/unittest/layers/layers_standalone_common_tests.cpp

index f9cf8ce..d75a743 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <iterator>
 #include <layer_context.h>
+#include <nntrainer_log.h>
 #include <stdexcept>
 #include <var_grad.h>
 #include <weight.h>
@@ -38,15 +39,15 @@ static void renameSpec(VarGradSpecV2 &spec,
 }
 
 InitLayerContext::InitLayerContext(const std::vector<TensorDim> &dim,
-                                   unsigned int num_req_out, bool in_place_,
-                                   const std::string &n,
+                                   const std::vector<bool> &req_out_connected,
+                                   bool in_place_, const std::string &n,
                                    const std::string &prefix_,
                                    const float max_norm) :
   input_dim(dim),
   in_place(in_place_),
   clip_by_global_norm(max_norm),
   output_specs(),
-  num_requested_out(num_req_out),
+  req_out_is_connected(req_out_connected),
   name(n),
   prefix(prefix_) {
   NNTR_THROW_IF(!validate(), std::invalid_argument)
@@ -56,23 +57,21 @@ InitLayerContext::InitLayerContext(const std::vector<TensorDim> &dim,
     prefix = name; // default prefix is the name
 }
 
+unsigned int InitLayerContext::getNumRequestedOutputs() const {
+  return req_out_is_connected.size();
+}
+
 void InitLayerContext::setOutputDimensions(
   const std::vector<TensorDim> &out_dim) {
-  NNTR_THROW_IF(out_dim.size() < num_requested_out, std::invalid_argument)
-    << "number of output dimension set is smaller than the number of out "
-       "tensor slots "
-       "requested, num output dimensions: "
-    << out_dim.size() << " slots to fill: " << num_requested_out;
-    << " context name: " << name;
-  NNTR_THROW_IF(output_specs.size(), std::invalid_argument)
-    << "output specification already set, cannot set twice. Check if output is "
-       "already requested elsewhere";
-  output_specs.reserve(out_dim.size());
+  std::vector<VarGradSpecV2> specs;
+  specs.reserve(out_dim.size());
 
   for (unsigned i = 0u, sz = out_dim.size(); i < sz; ++i) {
     auto spec = outSpec(out_dim.at(i));
-    output_specs.push_back(std::move(spec));
+    specs.push_back(std::move(spec));
   }
+
+  requestOutputs(std::move(specs));
 }
 
 VarGradSpecV2 InitLayerContext::outSpec(const TensorDim &dim,
@@ -90,18 +89,29 @@ VarGradSpecV2 InitLayerContext::outSpec(const TensorDim &dim,
 }
 
 void InitLayerContext::requestOutputs(std::vector<VarGradSpecV2> &&out_specs) {
-  NNTR_THROW_IF(out_specs.size() < num_requested_out, std::invalid_argument)
+  NNTR_THROW_IF(out_specs.size() < getNumRequestedOutputs(),
+                std::invalid_argument)
     << "number of output dimension set is smaller than the number of out "
        "tensor slots requested, num output specification: "
-    << out_specs.size() << " slots to fill: " << num_requested_out;
+    << out_specs.size() << " slots to fill: " << getNumRequestedOutputs()
+    << " context name: " << name;
   NNTR_THROW_IF(output_specs.size(), std::invalid_argument)
     << "output specification already set, cannot set twice. Check if output is "
        "already requested elsewhere";
   output_specs.reserve(out_specs.size());
 
+  auto is_dangled = [this](unsigned int idx) {
+    return req_out_is_connected.size() <= idx || req_out_is_connected[idx];
+  };
+
   for (unsigned i = 0u, sz = out_specs.size(); i < sz; ++i) {
     auto &spec = out_specs.at(i);
     renameSpec(spec, [i](std::string &name) { name += std::to_string(i); });
+    if (is_dangled(i)) {
+      ml_logw("given output is being dangled: %s in context: %s",
+              spec.variable_spec.name.c_str(), name.c_str());
+      spec.gradient_spec = nullptr;
+    }
     output_specs.push_back(std::move(spec));
   }
 }
index f2f8cfe..0db6447 100644 (file)
@@ -41,10 +41,17 @@ public:
    * @brief Construct a new Init Layer Context object
    *
    * @param dim Input dimensions for the layer
+   * @param req_out_connected bool vector to tell if requested output is
+   * trainable or not
+   * @param in_place_ true if the context is inplacable
+   * @param name name
+   * @param prefix_ prefix
+   * @param max_norm max norm
    */
-  InitLayerContext(const std::vector<TensorDim> &dim, unsigned int num_req_out,
-                   bool in_place_, const std::string &n = "",
-                   const std::string &prefix_ = "", const float max_norm = 0.0);
+  InitLayerContext(const std::vector<TensorDim> &dim,
+                   const std::vector<bool> &req_out_connected, bool in_place_,
+                   const std::string &n = "", const std::string &prefix_ = "",
+                   const float max_norm = 0.0);
 
   /**
    * @brief   get name by the layer
@@ -65,7 +72,7 @@ public:
    *
    * @return unsigned int number of inputs
    */
-  unsigned int getNumRequestedOutputs() const { return num_requested_out; }
+  unsigned int getNumRequestedOutputs() const;
 
   /**
    * @brief Get the Input Dimensions object
@@ -230,8 +237,8 @@ public:
   /**
    * @brief create var grad specification with output default
    *
-   * @param dim dimension dimension
-   * @param name name name
+   * @param dim dimension
+   * @param name name
    * @param ls variable lifespan
    * @param grad_ls gradient lifespan
    * @return VarGradSpecV2 var grad specification
@@ -298,10 +305,10 @@ private:
     tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
                      variables) */
 
-  unsigned int
-    num_requested_out; /**< number of requested outputs for the layer */
-  std::string name;    /**< name of the layer */
-  std::string prefix;  /**< prefix of the layer */
+  std::vector<bool> req_out_is_connected;
+  /**< a bool vector to tell if requested out is actually connected to others */
+  std::string name;   /**< name of the layer */
+  std::string prefix; /**< prefix of the layer */
 };
 
 /**
index 5c63924..ff8bc1c 100644 (file)
@@ -500,9 +500,14 @@ InitLayerContext LayerNode::finalize(const std::vector<TensorDim> &input_dims) {
   float max_norm = 0.0;
   if (!std::get<props::ClipGradByGlobalNorm>(*layer_node_props).empty())
     max_norm = std::get<props::ClipGradByGlobalNorm>(*layer_node_props).get();
-  auto init_context = InitLayerContext(
-    actual_input_dims, output_connections.size(),
-    executeInPlace() != InPlace::NONE, getName(), scope, max_norm);
+
+  std::vector<bool> out_info;
+  out_info.reserve(output_connections.size());
+  std::transform(output_connections.begin(), output_connections.end(),
+                 std::back_inserter(out_info), [](auto &con) { return !!con; });
+  auto init_context = InitLayerContext(actual_input_dims, out_info,
+                                       executeInPlace() != InPlace::NONE,
+                                       getName(), scope, max_norm);
 
   layer->finalize(init_context);
 
index 970fb81..4362105 100644 (file)
@@ -124,15 +124,15 @@ void TimeDistLayer::finalize(InitLayerContext &context) {
    */
   TensorDim dist_dim = input_dim;
   dist_dim.height(1);
-  InitLayerContext dist_context({dist_dim}, context.getNumRequestedOutputs(),
-                                context.executeInPlace(), context.getName());
+  InitLayerContext dist_context({dist_dim}, {}, context.executeInPlace(),
+                                context.getName());
 
   // During forwarding and backwarding, it set the input and output buffer of
   // dist_layer properly
   // dist_layer will use forwarding_with_val and backwarding_with_val
   dist_layer->finalize(dist_context);
 
-  TensorDim output_dim = dist_context.getOutputDimensions()[0];
+  TensorDim output_dim = dist_context.getOutSpecs()[0].variable_spec.dim;
   // input_dim.height is number of time iteration
   output_dim.height(input_dim.height());
   context.setOutputDimensions({output_dim});
index a8381c0..d53985f 100644 (file)
@@ -49,7 +49,7 @@ static InitLayerContext createInitContext(Layer *layer,
   std::vector<shape_parser_> parsed;
   from_string(input_shape_str, parsed);
 
-  InitLayerContext context({parsed.begin(), parsed.end()}, 1, false,
+  InitLayerContext context({parsed.begin(), parsed.end()}, {true}, false,
                            "golden_test");
   layer->finalize(context);
 
index 55a5f3e..223ae47 100644 (file)
@@ -42,7 +42,7 @@ TEST_P(LayerSemantics, finalizeValidate_p) {
   ml::train::TensorDim in_dim({1, 1, 1, 1});
   std::vector<ml::train::TensorDim> input_dims(num_inputs, in_dim);
   nntrainer::InitLayerContext init_context =
-    nntrainer::InitLayerContext(input_dims, 1, false, "layer");
+    nntrainer::InitLayerContext(input_dims, {true}, false, "layer");
   EXPECT_EQ(init_context.validate(), true);
 
   // set necessary properties only
@@ -80,7 +80,7 @@ TEST_P(LayerSemantics, setBatchValidate_p) {
   ml::train::TensorDim in_dim({1, 1, 1, 1});
   std::vector<ml::train::TensorDim> input_dims(num_inputs, in_dim);
   nntrainer::InitLayerContext init_context =
-    nntrainer::InitLayerContext(input_dims, 1, false, "layer");
+    nntrainer::InitLayerContext(input_dims, {true}, false, "layer");
   EXPECT_EQ(init_context.validate(), true);
 
   // set necessary properties only