static constexpr size_t SINGLE_INOUT_IDX = 0;
void ConcatLayer::finalize(InitLayerContext &context) {
- if (context.getNumOutputs() != 1) {
- throw std::invalid_argument(
- "Error: only a single output is supported with concat layer");
- }
-
auto &concat_dimension_prop = std::get<props::ConcatDimension>(concat_props);
/** for backward compatibility, default concat dimension will be channel */
/// @todo this is hacky way to force concat dimension to width if channel
#include <functional>
#include <layer_context.h>
+#include <stdexcept>
#include <var_grad.h>
#include <weight.h>
namespace nntrainer {
+void InitLayerContext::setOutputDimensions(
+ const std::vector<TensorDim> &out_dim) {
+ NNTR_THROW_IF(out_dim.size() < num_requested_out, std::invalid_argument)
+ << "number of output dimension set is smaller than the number of out "
+ "tensor slots "
+ "requested, num output dimensions: "
+ << output_dim.size() << " slots to fill: " << num_requested_out
+ << " context name: " << name;
+ output_dim = out_dim;
+}
+
RunLayerContext::RunLayerContext(const std::string &name, bool trainable,
float l, bool in_place_,
const std::vector<Weight *> &w,
*
* @param dim Input dimensions for the layer
*/
- InitLayerContext(const std::vector<TensorDim> &dim, unsigned int num_out,
+ InitLayerContext(const std::vector<TensorDim> &dim, unsigned int num_req_out,
bool in_place_, const std::string &n = "",
const std::string &prefix_ = "",
const float max_norm = 0.0) :
input_dim(dim),
in_place(in_place_),
clip_by_global_norm(max_norm),
- num_outputs(num_out),
+ num_requested_out(num_req_out),
name(n),
prefix(prefix_) {
NNTR_THROW_IF(!validate(), std::invalid_argument)
*
* @return unsigned int number of inputs
*/
- unsigned int getNumOutputs() const { return num_outputs; }
+ unsigned int getNumRequestedOutputs() const { return num_requested_out; }
/**
* @brief Get the Input Dimensions object
*
* @param out_dim the output dimension to set to
*/
- void setOutputDimensions(const std::vector<TensorDim> &out_dim) {
- if (out_dim.size() != num_outputs)
- throw std::invalid_argument("Mismatch number of outputs");
- output_dim = out_dim;
- }
+ void setOutputDimensions(const std::vector<TensorDim> &out_dim);
/**
* @brief Request a new weight for the layer
tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
variables) */
- unsigned int num_outputs; /**< number of outputs for the layer */
- std::string name; /**< name of the layer */
- std::string prefix; /**< prefix of the layer */
+ unsigned int
+ num_requested_out; /**< number of requested outputs for the layer */
+ std::string name; /**< name of the layer */
+ std::string prefix; /**< prefix of the layer */
};
/**
needs_calc_derivative(false),
needs_calc_gradient(false),
output_connections(),
- effective_output_connection_size(0),
run_context(nullptr),
layer_node_props(
new PropsType(props::Name(), props::Distribute(), props::Trainable(), {},
<< "cannot override connection, this slot is reserved for "
<< con->toString();
- effective_output_connection_size++;
con = std::make_unique<Connection>(name, index);
}
output_connections.reserve(layers.size());
std::transform(
layers.begin(), layers.end(), std::back_inserter(output_connections),
- [this](const std::string &id) { return std::make_unique<Connection>(id); });
- effective_output_connection_size = layers.size();
+ [](const std::string &id) { return std::make_unique<Connection>(id); });
}
bool LayerNode::hasInputShapeProperty() const {
layer = std::move(dlayer);
}
- /// remove flatten and activation since it's already realized
- layer_node_props_realization = std::make_unique<RealizationPropsType>(
- props::Flatten(), props::Activation());
-
- /// if intermediate node is not used anywhere, it means we need delicate
- /// handling, including interface change for init layer context because layer
- /// need to know which output is a dangling node, it is rather better to
- /// assume this is a buggy behavior
- /// if the output is possibly optional (for example, lstmcell returns hidden,
- /// cell) but cell might not be used else where. this can be easily checked by
- /// putting cell to the first output. In this case, there is no intermediate
- /// node unidentified it will pass this check while lstmcell can query by
- /// checking number of outputs from context
- NNTR_THROW_IF(getNumOutputConnections() != effective_output_connection_size,
- std::invalid_argument)
- << "Intermediate node is not used anywhere for node: " << getName()
- << " num output connection: " << getNumOutputConnections()
- << " effective_output_connection: " << effective_output_connection_size;
-
- auto num_outputs = effective_output_connection_size == 0
- ? 1
- : effective_output_connection_size;
-
auto scope = getSharedFrom().empty() ? getName() : getSharedFrom();
float max_norm = 0.0;
if (!std::get<props::ClipGradByGlobalNorm>(*layer_node_props).empty())
max_norm = std::get<props::ClipGradByGlobalNorm>(*layer_node_props).get();
- auto init_context = InitLayerContext(actual_input_dims, num_outputs,
- executeInPlace() != InPlace::NONE,
- getName(), scope, max_norm);
+ auto init_context = InitLayerContext(
+ actual_input_dims, output_connections.size(),
+ executeInPlace() != InPlace::NONE, getName(), scope, max_norm);
layer->finalize(init_context);
bool needs_calc_gradient; /**< cache if this layer needs to do calcGradient */
std::vector<std::unique_ptr<Connection>>
- output_connections; /**< output layer names */
- unsigned effective_output_connection_size; /**< effective output connection
- size, this skips not connected
- slot, so this number can be
- diffrent from num_outputs() */
+ output_connections; /**< output layer names */
std::unique_ptr<RunLayerContext>
run_context; /**< context required for running/execution of the layer. This
context.requestTensor(state_dim, "dstate", Tensor::Initializer::NONE, false,
TensorLifespan::BACKWARD_FUNC_LIFESPAN);
- if (context.getNumOutputs() == 2)
+ if (context.getNumRequestedOutputs() == 2)
context.setOutputDimensions({query_dim, state_dim});
else
context.setOutputDimensions({query_dim});
static constexpr size_t SINGLE_INOUT_IDX = 0;
void MultiOutLayer::finalize(InitLayerContext &context) {
- std::vector<TensorDim> out_dims(context.getNumOutputs());
+ std::vector<TensorDim> out_dims(context.getNumRequestedOutputs());
const TensorDim &in_dim = context.getInputDimensions()[0];
std::fill(out_dims.begin(), out_dims.end(), in_dim);
* 3. axis = 3, output_dim = [b,c,h,1], num_outputs = w
*/
const TensorDim &in_dim = context.getInputDimensions()[0];
- if (in_dim.getTensorDim(split_dimension) != context.getNumOutputs())
+ if (in_dim.getTensorDim(split_dimension) != context.getNumRequestedOutputs())
throw std::invalid_argument(
"Split dimension cannot be split into given number of outputs");
TensorDim d = in_dim;
d.setTensorDim(split_dimension, 1);
- std::vector<TensorDim> output_dim(context.getNumOutputs());
+ std::vector<TensorDim> output_dim(context.getNumRequestedOutputs());
for (auto &out_dim : output_dim) {
out_dim = d;
}
*/
TensorDim dist_dim = input_dim;
dist_dim.height(1);
- InitLayerContext dist_context({dist_dim}, context.getNumOutputs(),
+ InitLayerContext dist_context({dist_dim}, context.getNumRequestedOutputs(),
context.executeInPlace(), context.getName());
// During forwarding and backwarding, it set the input and output buffer of
props.push_back(input_shape);
props.push_back(input_layers);
lnode->setProperty(props);
+ lnode->setOutputLayers({"dummy"});
EXPECT_NO_THROW(lnode->setProperty(valid_properties));
if (!must_fail) {
nntrainer::InitLayerContext init_context = lnode->finalize();
- EXPECT_EQ(init_context.getOutputDimensions().size(),
- init_context.getNumOutputs());
for (auto const &dim : init_context.getOutputDimensions())
EXPECT_GT(dim.getDataLen(), size_t(0));
props.push_back(input_shape);
props.push_back(input_layers);
lnode->setProperty(props);
+ lnode->setOutputLayers({"dummy"});
EXPECT_NO_THROW(lnode->setProperty(valid_properties));
if (!must_fail) {
EXPECT_NO_THROW(layer->finalize(init_context));
- EXPECT_EQ(init_context.getOutputDimensions().size(),
- init_context.getNumOutputs());
-
for (auto const &dim : init_context.getOutputDimensions())
EXPECT_GT(dim.getDataLen(), size_t(0));
for (auto const &ws : init_context.getWeightsSpec())