#include <iterator>
#include <layer_context.h>
+#include <nntrainer_log.h>
#include <stdexcept>
#include <var_grad.h>
#include <weight.h>
}
InitLayerContext::InitLayerContext(const std::vector<TensorDim> &dim,
- unsigned int num_req_out, bool in_place_,
- const std::string &n,
+ const std::vector<bool> &req_out_connected,
+ bool in_place_, const std::string &n,
const std::string &prefix_,
const float max_norm) :
input_dim(dim),
in_place(in_place_),
clip_by_global_norm(max_norm),
output_specs(),
- num_requested_out(num_req_out),
+ req_out_is_connected(req_out_connected),
name(n),
prefix(prefix_) {
NNTR_THROW_IF(!validate(), std::invalid_argument)
prefix = name; // default prefix is the name
}
+unsigned int InitLayerContext::getNumRequestedOutputs() const {
+ return req_out_is_connected.size();
+}
+
void InitLayerContext::setOutputDimensions(
const std::vector<TensorDim> &out_dim) {
- NNTR_THROW_IF(out_dim.size() < num_requested_out, std::invalid_argument)
- << "number of output dimension set is smaller than the number of out "
- "tensor slots "
- "requested, num output dimensions: "
- << out_dim.size() << " slots to fill: " << num_requested_out;
- << " context name: " << name;
- NNTR_THROW_IF(output_specs.size(), std::invalid_argument)
- << "output specification already set, cannot set twice. Check if output is "
- "already requested elsewhere";
- output_specs.reserve(out_dim.size());
+ std::vector<VarGradSpecV2> specs;
+ specs.reserve(out_dim.size());
for (unsigned i = 0u, sz = out_dim.size(); i < sz; ++i) {
auto spec = outSpec(out_dim.at(i));
- output_specs.push_back(std::move(spec));
+ specs.push_back(std::move(spec));
}
+
+ requestOutputs(std::move(specs));
}
VarGradSpecV2 InitLayerContext::outSpec(const TensorDim &dim,
}
void InitLayerContext::requestOutputs(std::vector<VarGradSpecV2> &&out_specs) {
- NNTR_THROW_IF(out_specs.size() < num_requested_out, std::invalid_argument)
+ NNTR_THROW_IF(out_specs.size() < getNumRequestedOutputs(),
+ std::invalid_argument)
<< "number of output dimension set is smaller than the number of out "
"tensor slots requested, num output specification: "
- << out_specs.size() << " slots to fill: " << num_requested_out;
+ << out_specs.size() << " slots to fill: " << getNumRequestedOutputs()
+ << " context name: " << name;
NNTR_THROW_IF(output_specs.size(), std::invalid_argument)
<< "output specification already set, cannot set twice. Check if output is "
"already requested elsewhere";
output_specs.reserve(out_specs.size());
+ auto is_dangled = [this](unsigned int idx) {
+ return req_out_is_connected.size() <= idx || req_out_is_connected[idx];
+ };
+
for (unsigned i = 0u, sz = out_specs.size(); i < sz; ++i) {
auto &spec = out_specs.at(i);
renameSpec(spec, [i](std::string &name) { name += std::to_string(i); });
+ if (is_dangled(i)) {
+ ml_logw("given output is being dangled: %s in context: %s",
+ spec.variable_spec.name.c_str(), name.c_str());
+ spec.gradient_spec = nullptr;
+ }
output_specs.push_back(std::move(spec));
}
}
* @brief Construct a new Init Layer Context object
*
* @param dim Input dimensions for the layer
+ * @param req_out_connected bool vector to tell if requested output is
+ * trainable or not
+ * @param in_place_ true if the context is inplacable
+ * @param name name
+ * @param prefix_ prefix
+ * @param max_norm max norm
*/
- InitLayerContext(const std::vector<TensorDim> &dim, unsigned int num_req_out,
- bool in_place_, const std::string &n = "",
- const std::string &prefix_ = "", const float max_norm = 0.0);
+ InitLayerContext(const std::vector<TensorDim> &dim,
+ const std::vector<bool> &req_out_connected, bool in_place_,
+ const std::string &n = "", const std::string &prefix_ = "",
+ const float max_norm = 0.0);
/**
* @brief get name by the layer
*
* @return unsigned int number of inputs
*/
- unsigned int getNumRequestedOutputs() const { return num_requested_out; }
+ unsigned int getNumRequestedOutputs() const;
/**
* @brief Get the Input Dimensions object
/**
* @brief create var grad specification with output default
*
- * @param dim dimension dimension
- * @param name name name
+ * @param dim dimension
+ * @param name name
* @param ls variable lifespan
* @param grad_ls gradient lifespan
* @return VarGradSpecV2 var grad specification
tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
variables) */
- unsigned int
- num_requested_out; /**< number of requested outputs for the layer */
- std::string name; /**< name of the layer */
- std::string prefix; /**< prefix of the layer */
+ std::vector<bool> req_out_is_connected;
+ /**< a bool vector to tell if requested out is actually connected to others */
+ std::string name; /**< name of the layer */
+ std::string prefix; /**< prefix of the layer */
};
/**
float max_norm = 0.0;
if (!std::get<props::ClipGradByGlobalNorm>(*layer_node_props).empty())
max_norm = std::get<props::ClipGradByGlobalNorm>(*layer_node_props).get();
- auto init_context = InitLayerContext(
- actual_input_dims, output_connections.size(),
- executeInPlace() != InPlace::NONE, getName(), scope, max_norm);
+
+ std::vector<bool> out_info;
+ out_info.reserve(output_connections.size());
+ std::transform(output_connections.begin(), output_connections.end(),
+ std::back_inserter(out_info), [](auto &con) { return !!con; });
+ auto init_context = InitLayerContext(actual_input_dims, out_info,
+ executeInPlace() != InPlace::NONE,
+ getName(), scope, max_norm);
layer->finalize(init_context);
*/
TensorDim dist_dim = input_dim;
dist_dim.height(1);
- InitLayerContext dist_context({dist_dim}, context.getNumRequestedOutputs(),
- context.executeInPlace(), context.getName());
+ InitLayerContext dist_context({dist_dim}, {}, context.executeInPlace(),
+ context.getName());
// During forwarding and backwarding, it set the input and output buffer of
// dist_layer properly
// dist_layer will use forwarding_with_val and backwarding_with_val
dist_layer->finalize(dist_context);
- TensorDim output_dim = dist_context.getOutputDimensions()[0];
+ TensorDim output_dim = dist_context.getOutSpecs()[0].variable_spec.dim;
// input_dim.height is number of time iteration
output_dim.height(input_dim.height());
context.setOutputDimensions({output_dim});
std::vector<shape_parser_> parsed;
from_string(input_shape_str, parsed);
- InitLayerContext context({parsed.begin(), parsed.end()}, 1, false,
+ InitLayerContext context({parsed.begin(), parsed.end()}, {true}, false,
"golden_test");
layer->finalize(context);
ml::train::TensorDim in_dim({1, 1, 1, 1});
std::vector<ml::train::TensorDim> input_dims(num_inputs, in_dim);
nntrainer::InitLayerContext init_context =
- nntrainer::InitLayerContext(input_dims, 1, false, "layer");
+ nntrainer::InitLayerContext(input_dims, {true}, false, "layer");
EXPECT_EQ(init_context.validate(), true);
// set necessary properties only
ml::train::TensorDim in_dim({1, 1, 1, 1});
std::vector<ml::train::TensorDim> input_dims(num_inputs, in_dim);
nntrainer::InitLayerContext init_context =
- nntrainer::InitLayerContext(input_dims, 1, false, "layer");
+ nntrainer::InitLayerContext(input_dims, {true}, false, "layer");
EXPECT_EQ(init_context.validate(), true);
// set necessary properties only