1 // SPDX-License-Identifier: Apache-2.0
3 * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
5 * @file layer_context.h
7 * @see https://github.com/nnstreamer/nntrainer
8 * @author Parichay Kapoor <pk.kapoor@samsung.com>
9 * @bug No known bugs except for NYI items
10 * @brief This is the layer context for each layer
13 #ifndef __LAYER_CONTEXT_H__
14 #define __LAYER_CONTEXT_H__
19 #include <common_properties.h>
21 #include <tensor_dim.h>
22 #include <tensor_wrap_specs.h>
30 * @class Layer Context class for all layers
31 * @brief Class for Layer context
33 * @details This provides for the layer initialization. This context will not
34 * contain any structures which allow allocation of memory or support to
35 * allocate any new memory, but rather only support storing specifications based
36 * on which memory will be allocated later.
38 class InitLayerContext {
41 * @brief Construct a new Init Layer Context object
43 * @param dim Input dimensions for the layer
44 * @param req_out_connected bool vector to tell if requested output is
46 * @param in_place_ true if the context is inplacable
48 * @param prefix_ prefix
49 * @param max_norm max norm
51 InitLayerContext(const std::vector<TensorDim> &dim,
52 const std::vector<bool> &req_out_connected, bool in_place_,
53 const std::string &n = "", const std::string &prefix_ = "",
54 const float max_norm = 0.0);
57 * @brief get name by the layer
59 * @return name of the layer
61 const std::string &getName() const { return name; }
64 * @brief Get the number of inputs for the layer
66 * @return unsigned int number of inputs
68 unsigned int getNumInputs() const { return input_dim.size(); }
71 * @brief Get the number of requested outputs for the layer
73 * @return unsigned int number of requested outputs
75 unsigned int getNumRequestedOutputs() const;
78 * @brief Get the Input Dimensions object
80 * @return const std::vector<TensorDim>& Input dimensions
82 const std::vector<TensorDim> &getInputDimensions() const { return input_dim; }
85 * @brief Set the Dim Flag to retrieve effective dimension
87 * @param dim_flag_ dimension bit to calculate, rightmost is width
90 setEffDimFlagInputDimension(unsigned int idx,
91 const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
92 input_dim[idx].setEffDimFlag(dim_flag_);
96 * @brief Set the dynamic Dim Flag to retrieve dynamic dimension (that can
97 * change during running)
99 * @param dim_flag_ dimension bit to calculate, rightmost is width
102 setDynDimFlagInputDimension(unsigned int idx,
103 const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
104 input_dim[idx].setDynDimFlag(dim_flag_);
108 * @brief Set the Output Dimensions object
110 * @param out_dim the output dimension to set to
112 void setOutputDimensions(const std::vector<TensorDim> &out_dim);
115 * @brief Request a new weight for the layer
117 * @param dim dimension of the weight
118 * @param init initializer for the weight
119 * @param reg regularizer for the weight
120 * @param reg_const regularization constant for the weight
121 * @param name name of the weight
122 * @param trainable if the weight is trainable (require gradient or not)
123 * @return unsigned int index of the weight for its getter
125 * @todo Consider providing a guarantee that the returned indices will always
126 * start from 0 and will always be incremental.
128 unsigned int requestWeight(const TensorDim &dim,
129 const Tensor::Initializer init,
130 const WeightRegularizer reg, const float reg_const,
131 const float decay, const std::string &name,
132 bool trainable = true) {
133 weights_spec.emplace_back(dim, init, reg, reg_const, decay,
134 clip_by_global_norm, trainable,
135 prefix + ":" + name);
136 return weights_spec.size() - 1;
140 * @brief Request a new weight for the layer
142 * @param spec tensor spec
143 * @return unsigned int index of the weight for its getter
145 * @todo Consider providing a guarantee that the returned indices will always
146 * start from 0 and will always be incremental.
148 unsigned int requestWeight(const WeightSpec &spec) {
149 weights_spec.emplace_back(spec);
150 return weights_spec.size() - 1;
154 * @brief Request a new tensor for the layer
156 * @param dim dimension of the tensor
157 * @param trainable if the tensor is trainable (require gradient or not)
158 * @param name name of the tensor
159 * @param lifespan lifespan of the tensor
160 * @param private_ if custom tensor should not be shared, and only for soleuse
161 * @return unsigned int index of the tensor for its getter
163 * @todo Consider providing a guarantee that the returned indices will always
164 * start from 0 and will always be incremental.
167 requestTensor(const TensorDim &dim, const std::string &name,
168 const Tensor::Initializer init = Tensor::Initializer::NONE,
169 bool trainable = false,
170 TensorLifespan lifespan = TensorLifespan::ITERATION_LIFESPAN,
171 bool private_ = true) {
172 auto prefix_ = private_ ? this->name : this->prefix;
173 tensors_spec.emplace_back(dim, init, trainable, prefix_ + ":" + name,
175 return tensors_spec.size() - 1;
179 * @brief Specification of the tensors
182 typedef VarGradSpec TensorSpec;
185 * @brief Request a new tensor for the layer
187 * @param spec tensor spec
188 * @return unsigned int index of the tensor for its getter
190 * @todo Consider providing a guarantee that the returned indices will always
191 * start from 0 and will always be incremental.
193 unsigned int requestTensor(const TensorSpec &spec) {
194 tensors_spec.emplace_back(spec);
195 return tensors_spec.size() - 1;
199 * @brief Get the current weights spec
201 * @return The current weights spec
203 const std::vector<WeightSpec> &getWeightsSpec() const { return weights_spec; }
206 * @brief Get the number of requested weights
208 * @return The current number of requested weights
210 unsigned int getNumWeights() const { return weights_spec.size(); }
213 * @brief Get the current tensors spec
215 * @return The current tensors spec
217 const std::vector<TensorSpec> &getTensorsSpec() const { return tensors_spec; }
220 * @brief Get the number of requested tensors objects
222 * @return unsigned int number of requested tensors
224 unsigned int getNumTensors() const { return tensors_spec.size(); }
227 * @brief create var grad specification with output default
229 * @param dim dimension
231 * @param ls variable lifespan
232 * @param grad_ls gradient lifespan
233 * @return VarGradSpecV2 var grad specification
236 outSpec(const TensorDim &dim, const std::string &name = "out",
237 TensorLifespan ls = TensorLifespan::FORWARD_FUNC_LIFESPAN,
238 TensorLifespan grad_ls = TensorLifespan::CALC_GRAD_DERIV_LIFESPAN);
241 * @brief request outputs
243 * @param out_specs pack of out specification, name will be automatically
244 * indexed to prevent name clash
246 void requestOutputs(std::vector<VarGradSpecV2> &&out_specs);
249 * @brief Get the Out Specs object
251 * @return std::vector<VarGradSpecV2> out specification
253 const std::vector<VarGradSpecV2> &getOutSpecs() const;
256 * @brief Validate the context
258 * @return true if validated, else false
259 * @note this must be called before passing a context to a layer for finalize
262 if (input_dim.empty()) {
266 for (auto const &dim : input_dim) {
267 if (dim.getDataLen() == 0) {
280 * @brief check if the layer is expected to run in-place
282 * @return true if in-place, else false
284 bool executeInPlace() const { return in_place; }
287 std::vector<TensorDim> input_dim; /**< Input dimensions for the layer */
288 bool in_place; /**< if the layer is expected to run in-place */
289 float clip_by_global_norm; /**< max norm value for clip by norm */
291 std::vector<VarGradSpecV2> output_specs; /**< Specification for the output */
292 std::vector<WeightSpec> weights_spec; /**< Specification for the weights */
293 std::vector<TensorSpec>
294 tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
297 std::vector<bool> req_out_is_connected;
298 /**< a bool vector to tell if requested out is actually connected to others */
299 std::string name; /**< name of the layer */
300 std::string prefix; /**< prefix of the layer */
304 * @class Layer Context class for all layers
305 * @brief Class for Layer context
307 * @details This provides for the layer executing. This context will contain
308 * structures with memory allocated or support to allocate any new memory, but
309 * rather only support storing specifications based on which memory will be
312 * @todo Check the caller of the getTensor() and set restrictions on the tensors
313 * to be accessed based on which function is requesting it.
315 class RunLayerContext {
318 * @brief Construct a new Run Layer Context object
321 RunLayerContext() : loss(0.0), in_place(false) {}
324 * @brief Construct a new Run Layer Context object
327 RunLayerContext(const std::string &name, bool in_place_) : RunLayerContext() {
328 in_place = in_place_;
329 std::get<props::Name>(props).set(name);
333 * @brief Construct a new Run Layer Context object
335 * @param name name of the layer
336 * @param trainable if the layer is trainable
337 * @param l loss of the layer
338 * @param in_place_ execution in-place of the layer
339 * @param w weights of the layer
340 * @param in inputs of the layer
341 * @param out outputs of the layer
342 * @param t extra tensors of the layer
344 RunLayerContext(const std::string &name, bool trainable, float l,
345 bool in_place_, const std::vector<Weight *> &w,
346 const std::vector<Var_Grad *> &in,
347 const std::vector<Var_Grad *> &out,
348 const std::vector<Var_Grad *> &t);
351 * @brief Get the Weight tensor object
353 * @param idx Identifier of the weight
354 * @return Tensor& Reference to the weight tensor
356 Tensor &getWeight(unsigned int idx) const;
359 * @brief Get the Weight Gradient tensor object
361 * @note this method returns the fresh gradient to be filled
362 * @param idx Identifier of the weight
363 * @return Tensor& Reference to the weight grad tensor
365 Tensor &getWeightGrad(unsigned int idx) const;
368 * @brief Get the Weight Optimizer Variable tensor object
370 * @param idx Identifier of the weight
371 * @param jdx Identifier of the weight optimizer variable
372 * @return Tensor& Reference to the weight grad tensor
374 Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
377 * @brief Get the Weight name
379 * @param idx Identifier of the weight
380 * @return name of the weight
382 const std::string &getWeightName(unsigned int idx) const;
385 * @brief check if the weight has gradient
387 * @param idx Identifier of the weight
388 * @return true if weight has gradient, else false
390 bool weightHasGradient(unsigned int idx) const;
393 * @brief Get the Output tensor object
395 * @param idx Identifier of the output
396 * @return Tensor& Reference to the output tensor
398 Tensor &getOutput(unsigned int idx);
401 * @brief Get the Output tensor object
403 * @param idx Identifier of the output
404 * @return Tensor& Reference to the output tensor
406 const Tensor &getOutput(unsigned int idx) const;
409 * @brief Get the Output Grad tensor object
411 * @param idx Identifier of the output
412 * @return Read-only output grad tensor, if derivative does not have
413 * gradient, return a temporary, initialized to zero
415 const Tensor getOutputGrad(unsigned int idx) const;
418 * @brief Get the Output Grad tensor object
420 * @param idx Identifier of the output
421 * @return Tensor& Reference to the output grad tensor, this is valid only if
422 * the given output is trainable
424 * @note recommended to NOT use this function as a layer developer but rather
425 * use getOutputGrad().
427 Tensor &getOutputGradUnsafe(unsigned int idx);
430 * @brief check if the weight has gradient
432 * @param idx Identifier of the weight
433 * @return true if weight has gradient, else false
435 bool outputHasGradient(unsigned int idx) const;
438 * @brief Get the incoming Derivative tensor object
440 * @param idx Identifier of the output
441 * @return Tensor output derivative tensor, if derivative does not have
442 * gradient, return a temporary, initialized to zero
444 const Tensor getIncomingDerivative(unsigned int idx) const;
447 * @brief Get the Input tensor object
449 * @param idx Identifier of the input
450 * @return Tensor& Reference to the input grad tensor
452 Tensor &getInput(unsigned int idx);
455 * @brief Get the Input tensor object
457 * @param idx Identifier of the input
458 * @return Tensor& Reference to the input grad tensor
460 const Tensor &getInput(unsigned int idx) const;
463 * @brief Get the Input Grad tensor object
465 * @param idx Identifier of the input
466 * @return Tensor& Reference to the input grad tensor
468 Tensor &getInputGrad(unsigned int idx);
471 * @brief check if the weight has gradient
473 * @param idx Identifier of the weight
474 * @return true if weight has gradient, else false
476 bool inputHasGradient(unsigned int idx) const;
479 * @brief Get the outgoing Derivative tensor object
481 * @param idx Identifier of the input
482 * @return Tensor& Reference to the input derivative tensor
484 Tensor &getOutgoingDerivative(unsigned int idx);
487 * @brief Get the Tensor object
489 * @param idx Identifier of the tensor
490 * @return Tensor& Reference to the tensor
492 Tensor &getTensor(unsigned int idx);
495 * @brief Get the Tensor object
497 * @param idx Identifier of the tensor
498 * @return Tensor& Reference to the tensor
500 const Tensor &getTensor(unsigned int idx) const;
503 * @brief Get the Tensor Grad object
505 * @param idx Identifier of the tensor
506 * @return Tensor& Reference to the tensor grad tensor
508 Tensor &getTensorGrad(unsigned int idx);
511 * @brief Get the Tensor Grad object
513 * @param idx Identifier of the tensor
514 * @return Tensor& Reference to the tensor grad tensor
516 const Tensor &getTensorGrad(unsigned int idx) const;
519 * @brief check if the tensor has gradient
521 * @param idx Identifier of the tensor
522 * @return true if tensor has gradient, else false
524 bool tensorHasGradient(unsigned int idx) const;
527 * @brief check if the weight is burrowed from others so it is dependent
530 * @return bool true if weight is burrowed from outside
532 bool isWeightDependent(unsigned int idx) const;
535 * @brief check current gradient is first access
536 * @note for now, it equivalent to weight last access, so this value is
537 * accessible for non-trainable weights as well. This is in terms of execution
541 * @return bool true if first access
543 bool isGradientFirstAccess(unsigned int idx) const;
546 * @brief check current gradient is last access
547 * @note for now, it equivalent to weight last access, so this value is
548 * accessible for non-trainable weights as well. This is in terms of execution
552 * @return bool true if last access
554 bool isGradientLastAccess(unsigned int idx) const;
557 * @brief check if the gradient is to be clipped by global norm
560 * @return bool true if it is to be clipped else false
562 bool isGradientClipByGlobalNorm(unsigned int idx) const;
565 * @brief Get the tensor name
567 * @param idx Identifier of the tensor
568 * @return name of the tensor
570 const std::string &getTensorName(unsigned int idx) const;
573 * @brief Get the number of Outputs tensor objects
575 * @return unsigned int number of output tensors
577 unsigned int getNumOutputs() const { return outputs.size(); }
580 * @brief Get the number of inputs tensor objects
582 * @return unsigned int number of input tensors
584 unsigned int getNumInputs() const { return inputs.size(); }
587 * @brief Get the number of weights tensor objects
589 * @return unsigned int number of weight tensors
591 unsigned int getNumWeights() const { return weights.size(); }
594 * @brief Get the Number of Weight Optimizer Variable tensor object
596 * @param idx Identifier of the weight
597 * @return unsigned int Number of the weight optimizer variable
599 unsigned int getNumWeightOptVar(unsigned int idx) const;
602 * @brief Get the number of requested tensors objects
604 * @return unsigned int number of requested tensors
606 unsigned int getNumTensors() const { return tensors.size(); }
609 * @brief Set the batch for the run context
611 * @param batch Update batch size
613 void setBatch(unsigned int batch);
616 * @brief Update the dimensions for a requested tensor
618 * @param idx index of the tensor (identifier)
619 * @param batch Updated batch size
621 void updateTensor(unsigned int idx, unsigned int batch);
624 * @brief Get weight object for the weights
626 * @param idx index of the weight (identifier)
627 * @return weight object
629 Weight &getWeightObject(unsigned int idx);
632 * @brief check if the label is available
634 * @param idx Identifier of the input
635 * @return true if label is available else false
637 bool isLabelAvailable(unsigned int idx) const;
640 * @brief Get label tensor
642 * @param idx Identifier of the input
643 * @return Tensor& Reference to the label tensor
645 Tensor &getLabel(unsigned int idx);
648 * @brief update loss by the layer
650 * @param val updated loss value
651 * @note loss value is only used for loss layers. For non-loss layers, setting
652 * this value will have no change on the behavior of the model.
654 void setLoss(float val) { loss = val; }
657 * @brief update loss by the layer
659 * @return loss of the layer
660 * @note does not includes the regularization loss.
662 float getLoss() const { return loss; }
665 * @brief get regularization loss of the layer
667 * @return regularization loss of the layer
669 float getRegularizationLoss() const {
671 for (unsigned int idx = 0; idx < getNumWeights(); idx++) {
672 loss_ += getWeightRegularizationLoss(idx);
678 * @brief get name by the layer
680 * @return name of the layer
682 const std::string &getName() const { return std::get<props::Name>(props); }
685 * @brief get trainable by the layer
687 * @return trainable of the layer
689 bool getTrainable() const { return std::get<props::Trainable>(props); }
692 * @brief check if run context is set and is ready to use
694 * @return true if ready, else false
696 bool readyToUse() const;
699 * @brief validates the run context after run
701 * @param skip_input skip verifying the input
702 * @param skip_label skip verifying the label
704 * @return true if ready, else false
706 bool validate(bool skip_input = false, bool skip_label = false);
709 * @brief check if the layer is expected to run in-place
711 * @return true if in-place, else false
713 bool executeInPlace() const { return in_place; }
716 std::tuple<props::Name, props::Trainable> props; /**< props of the layer */
717 float loss; /**< loss of the layer */
718 bool in_place; /**< if the layer is expected to run in-place */
720 std::vector<Weight *> weights; /**< weights of the layer */
721 std::vector<Var_Grad *> inputs; /**< inputs of the layer */
722 std::vector<Var_Grad *> outputs; /**< outputs of the layer */
723 std::vector<Var_Grad *> tensors; /**< tensors of the layer */
726 std::map<std::string, const void *>
727 tensor_map; /**< map of tensor name to tensor address */
731 * @brief Get regularization loss for the weight
733 * @param idx Identifier of the weight
734 * @return float Value of the loss
736 float getWeightRegularizationLoss(unsigned int idx) const;
739 } // namespace nntrainer
740 #endif // __LAYER_CONTEXT_H__