nntrainer/layers/layer_context.h

   1 // SPDX-License-Identifier: Apache-2.0
   2 /**
   3  * Copyright (C) 2021 Parichay Kapoor <pk.kapoor@samsung.com>
   4  *
   5  * @file   layer_context.h
   6  * @date   10 June 2021
   7  * @see    https://github.com/nnstreamer/nntrainer
   8  * @author Parichay Kapoor <pk.kapoor@samsung.com>
   9  * @bug    No known bugs except for NYI items
  10  * @brief  This is the layer context for each layer
  11  */
  12
  13 #ifndef __LAYER_CONTEXT_H__
  14 #define __LAYER_CONTEXT_H__
  15
  16 #include <memory>
  17 #include <vector>
  18
  19 #include <common_properties.h>
  20 #include <tensor.h>
  21 #include <tensor_dim.h>
  22 #include <tensor_wrap_specs.h>
  23
  24 namespace nntrainer {
  25
  26 class Weight;
  27 class Var_Grad;
  28
  29 /**
  30  * @class   Layer Context class for all layers
  31  * @brief   Class for Layer context
  32  *
  33  * @details This provides for the layer initialization. This context will not
  34  * contain any structures which allow allocation of memory or support to
  35  * allocate any new memory, but rather only support storing specifications based
  36  * on which memory will be allocated later.
  37  */
  38 class InitLayerContext {
  39 public:
  40   /**
  41    * @brief Construct a new Init Layer Context object
  42    *
  43    * @param dim Input dimensions for the layer
  44    * @param req_out_connected bool vector to tell if requested output is
  45    * trainable or not
  46    * @param in_place_ true if the context is inplacable
  47    * @param name name
  48    * @param prefix_ prefix
  49    * @param max_norm max norm
  50    */
  51   InitLayerContext(const std::vector<TensorDim> &dim,
  52                    const std::vector<bool> &req_out_connected, bool in_place_,
  53                    const std::string &n = "", const std::string &prefix_ = "",
  54                    const float max_norm = 0.0);
  55
  56   /**
  57    * @brief   get name by the layer
  58    *
  59    * @return name of the layer
  60    */
  61   const std::string &getName() const { return name; }
  62
  63   /**
  64    * @brief Get the number of inputs for the layer
  65    *
  66    * @return unsigned int number of inputs
  67    */
  68   unsigned int getNumInputs() const { return input_dim.size(); }
  69
  70   /**
  71    * @brief Get the number of requested outputs for the layer
  72    *
  73    * @return unsigned int number of requested outputs
  74    */
  75   unsigned int getNumRequestedOutputs() const;
  76
  77   /**
  78    * @brief Get the Input Dimensions object
  79    *
  80    * @return const std::vector<TensorDim>& Input dimensions
  81    */
  82   const std::vector<TensorDim> &getInputDimensions() const { return input_dim; }
  83
  84   /**
  85    * @brief Set the Dim Flag to retrieve effective dimension
  86    *
  87    * @param dim_flag_ dimension bit to calculate, rightmost is width
  88    */
  89   void
  90   setEffDimFlagInputDimension(unsigned int idx,
  91                               const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
  92     input_dim[idx].setEffDimFlag(dim_flag_);
  93   }
  94
  95   /**
  96    * @brief Set the dynamic Dim Flag to retrieve dynamic dimension (that can
  97    * change during running)
  98    *
  99    * @param dim_flag_ dimension bit to calculate, rightmost is width
 100    */
 101   void
 102   setDynDimFlagInputDimension(unsigned int idx,
 103                               const std::bitset<TensorDim::MAXDIM> &dim_flag_) {
 104     input_dim[idx].setDynDimFlag(dim_flag_);
 105   }
 106
 107   /**
 108    * @brief Set the Output Dimensions object
 109    *
 110    * @param out_dim the output dimension to set to
 111    */
 112   void setOutputDimensions(const std::vector<TensorDim> &out_dim);
 113
 114   /**
 115    * @brief Request a new weight for the layer
 116    *
 117    * @param dim dimension of the weight
 118    * @param init initializer for the weight
 119    * @param reg regularizer for the weight
 120    * @param reg_const regularization constant for the weight
 121    * @param name name of the weight
 122    * @param trainable if the weight is trainable (require gradient or not)
 123    * @return unsigned int index of the weight for its getter
 124    *
 125    * @todo Consider providing a guarantee that the returned indices will always
 126    * start from 0 and will always be incremental.
 127    */
 128   unsigned int requestWeight(const TensorDim &dim,
 129                              const Tensor::Initializer init,
 130                              const WeightRegularizer reg, const float reg_const,
 131                              const float decay, const std::string &name,
 132                              bool trainable = true) {
 133     weights_spec.emplace_back(dim, init, reg, reg_const, decay,
 134                               clip_by_global_norm, trainable,
 135                               prefix + ":" + name);
 136     return weights_spec.size() - 1;
 137   }
 138
 139   /**
 140    * @brief Request a new weight for the layer
 141    *
 142    * @param spec tensor spec
 143    * @return unsigned int index of the weight for its getter
 144    *
 145    * @todo Consider providing a guarantee that the returned indices will always
 146    * start from 0 and will always be incremental.
 147    */
 148   unsigned int requestWeight(const WeightSpec &spec) {
 149     weights_spec.emplace_back(spec);
 150     return weights_spec.size() - 1;
 151   }
 152
 153   /**
 154    * @brief Request a new tensor for the layer
 155    *
 156    * @param dim dimension of the tensor
 157    * @param trainable if the tensor is trainable (require gradient or not)
 158    * @param name name of the tensor
 159    * @param lifespan lifespan of the tensor
 160    * @param private_ if custom tensor should not be shared, and only for soleuse
 161    * @return unsigned int index of the tensor for its getter
 162    *
 163    * @todo Consider providing a guarantee that the returned indices will always
 164    * start from 0 and will always be incremental.
 165    */
 166   unsigned int
 167   requestTensor(const TensorDim &dim, const std::string &name,
 168                 const Tensor::Initializer init = Tensor::Initializer::NONE,
 169                 bool trainable = false,
 170                 TensorLifespan lifespan = TensorLifespan::ITERATION_LIFESPAN,
 171                 bool private_ = true) {
 172     auto prefix_ = private_ ? this->name : this->prefix;
 173     tensors_spec.emplace_back(dim, init, trainable, prefix_ + ":" + name,
 174                               lifespan);
 175     return tensors_spec.size() - 1;
 176   }
 177
 178   /**
 179    * @brief Specification of the tensors
 180    *
 181    */
 182   typedef VarGradSpec TensorSpec;
 183
 184   /**
 185    * @brief Request a new tensor for the layer
 186    *
 187    * @param spec tensor spec
 188    * @return unsigned int index of the tensor for its getter
 189    *
 190    * @todo Consider providing a guarantee that the returned indices will always
 191    * start from 0 and will always be incremental.
 192    */
 193   unsigned int requestTensor(const TensorSpec &spec) {
 194     tensors_spec.emplace_back(spec);
 195     return tensors_spec.size() - 1;
 196   }
 197
 198   /**
 199    * @brief Get the current weights spec
 200    *
 201    * @return The current weights spec
 202    */
 203   const std::vector<WeightSpec> &getWeightsSpec() const { return weights_spec; }
 204
 205   /**
 206    * @brief Get the number of requested weights
 207    *
 208    * @return The current number of requested weights
 209    */
 210   unsigned int getNumWeights() const { return weights_spec.size(); }
 211
 212   /**
 213    * @brief Get the current tensors spec
 214    *
 215    * @return The current tensors spec
 216    */
 217   const std::vector<TensorSpec> &getTensorsSpec() const { return tensors_spec; }
 218
 219   /**
 220    * @brief Get the number of requested tensors objects
 221    *
 222    * @return unsigned int number of requested tensors
 223    */
 224   unsigned int getNumTensors() const { return tensors_spec.size(); }
 225
 226   /**
 227    * @brief create var grad specification with output default
 228    *
 229    * @param dim dimension
 230    * @param name name
 231    * @param ls variable lifespan
 232    * @param grad_ls gradient lifespan
 233    * @return VarGradSpecV2 var grad specification
 234    */
 235   static VarGradSpecV2
 236   outSpec(const TensorDim &dim, const std::string &name = "out",
 237           TensorLifespan ls = TensorLifespan::FORWARD_GRAD_LIFESPAN,
 238           TensorLifespan grad_ls = TensorLifespan::BACKWARD_FUNC_LIFESPAN);
 239
 240   /**
 241    * @brief request outputs
 242    *
 243    * @param out_specs pack of out specification, name will be automatically
 244    * indexed to prevent name clash
 245    */
 246   void requestOutputs(std::vector<VarGradSpecV2> &&out_specs);
 247
 248   /**
 249    * @brief Get the Out Specs object
 250    *
 251    * @return std::vector<VarGradSpecV2> out specification
 252    */
 253   const std::vector<VarGradSpecV2> &getOutSpecs() const;
 254
 255   /**
 256    * @brief Validate the context
 257    *
 258    * @return true if validated, else false
 259    * @note this must be called before passing a context to a layer for finalize
 260    */
 261   bool validate() {
 262     if (input_dim.empty()) {
 263       return false;
 264     }
 265
 266     for (auto const &dim : input_dim) {
 267       if (dim.getDataLen() == 0) {
 268         return false;
 269       }
 270     }
 271
 272     if (name.empty()) {
 273       return false;
 274     }
 275
 276     return true;
 277   }
 278
 279   /**
 280    * @brief   check if the layer is expected to run in-place
 281    *
 282    * @return true if in-place, else false
 283    */
 284   bool executeInPlace() const { return in_place; }
 285
 286 private:
 287   std::vector<TensorDim> input_dim; /**< Input dimensions for the layer */
 288   bool in_place;             /**< if the layer is expected to run in-place */
 289   float clip_by_global_norm; /**< max norm value for clip by norm */
 290
 291   std::vector<VarGradSpecV2> output_specs; /**< Specification for the output */
 292   std::vector<WeightSpec> weights_spec;    /**< Specification for the weights */
 293   std::vector<TensorSpec>
 294     tensors_spec; /**< Specification for the var_grad (trainable/non-trainable
 295                      variables) */
 296
 297   std::vector<bool> req_out_is_connected;
 298   /**< a bool vector to tell if requested out is actually connected to others */
 299   std::string name;   /**< name of the layer */
 300   std::string prefix; /**< prefix of the layer */
 301 };
 302
 303 /**
 304  * @class   Layer Context class for all layers
 305  * @brief   Class for Layer context
 306  *
 307  * @details This provides for the layer executing. This context will contain
 308  * structures with memory allocated or support to allocate any new memory, but
 309  * rather only support storing specifications based on which memory will be
 310  * allocated later.
 311  *
 312  * @todo Check the caller of the getTensor() and set restrictions on the tensors
 313  * to be accessed based on which function is requesting it.
 314  */
 315 class RunLayerContext {
 316 public:
 317   /**
 318    * @brief Construct a new Run Layer Context object
 319    *
 320    */
 321   RunLayerContext() : loss(0.0), in_place(false) {}
 322
 323   /**
 324    * @brief Construct a new Run Layer Context object
 325    *
 326    */
 327   RunLayerContext(const std::string &name, bool in_place_) : RunLayerContext() {
 328     in_place = in_place_;
 329     std::get<props::Name>(props).set(name);
 330   }
 331
 332   /**
 333    * @brief Construct a new Run Layer Context object
 334    *
 335    * @param name name of the layer
 336    * @param trainable if the layer is trainable
 337    * @param l loss of the layer
 338    * @param in_place_ execution in-place of the layer
 339    * @param w weights of the layer
 340    * @param in inputs of the layer
 341    * @param out outputs of the layer
 342    * @param t extra tensors of the layer
 343    */
 344   RunLayerContext(const std::string &name, bool trainable, float l,
 345                   bool in_place_, const std::vector<Weight *> &w,
 346                   const std::vector<Var_Grad *> &in,
 347                   const std::vector<Var_Grad *> &out,
 348                   const std::vector<Var_Grad *> &t);
 349
 350   /**
 351    * @brief Get the Weight tensor object
 352    *
 353    * @param idx Identifier of the weight
 354    * @return Tensor& Reference to the weight tensor
 355    */
 356   Tensor &getWeight(unsigned int idx) const;
 357
 358   /**
 359    * @brief Get the Weight Gradient tensor object
 360    *
 361    * @note this method returns the fresh gradient to be filled
 362    * @param idx Identifier of the weight
 363    * @return Tensor& Reference to the weight grad tensor
 364    */
 365   Tensor &getWeightGrad(unsigned int idx) const;
 366
 367   /**
 368    * @brief Get the Weight Optimizer Variable tensor object
 369    *
 370    * @param idx Identifier of the weight
 371    * @param jdx Identifier of the weight optimizer variable
 372    * @return Tensor& Reference to the weight grad tensor
 373    */
 374   Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
 375
 376   /**
 377    * @brief Get the Weight name
 378    *
 379    * @param idx Identifier of the weight
 380    * @return name of the weight
 381    */
 382   const std::string &getWeightName(unsigned int idx) const;
 383
 384   /**
 385    * @brief check if the weight has gradient
 386    *
 387    * @param idx Identifier of the weight
 388    * @return true if weight has gradient, else false
 389    */
 390   bool weightHasGradient(unsigned int idx) const;
 391
 392   /**
 393    * @brief Get the Output tensor object
 394    *
 395    * @param idx Identifier of the output
 396    * @return Tensor& Reference to the output tensor
 397    */
 398   Tensor &getOutput(unsigned int idx);
 399
 400   /**
 401    * @brief Get the Output tensor object
 402    *
 403    * @param idx Identifier of the output
 404    * @return Tensor& Reference to the output tensor
 405    */
 406   const Tensor &getOutput(unsigned int idx) const;
 407
 408   /**
 409    * @brief Get the Output Grad tensor object
 410    *
 411    * @param idx Identifier of the output
 412    * @return Read-only output grad tensor, if derivative does not have
 413    * gradient, return a temporary, initialized to zero
 414    */
 415   const Tensor getOutputGrad(unsigned int idx) const;
 416
 417   /**
 418    * @brief Get the Output Grad tensor object
 419    *
 420    * @param idx Identifier of the output
 421    * @return Tensor& Reference to the output grad tensor, this is valid only if
 422    * the given output is trainable
 423    *
 424    * @note recommended to NOT use this function as a layer developer but rather
 425    * use getOutputGrad().
 426    */
 427   Tensor &getOutputGradUnsafe(unsigned int idx);
 428
 429   /**
 430    * @brief check if the weight has gradient
 431    *
 432    * @param idx Identifier of the weight
 433    * @return true if weight has gradient, else false
 434    */
 435   bool outputHasGradient(unsigned int idx) const;
 436
 437   /**
 438    * @brief Get the incoming Derivative tensor object
 439    *
 440    * @param idx Identifier of the output
 441    * @return Tensor output derivative tensor, if derivative does not have
 442    * gradient, return a temporary, initialized to zero
 443    */
 444   const Tensor getIncomingDerivative(unsigned int idx) const;
 445
 446   /**
 447    * @brief Get the Input tensor object
 448    *
 449    * @param idx Identifier of the input
 450    * @return Tensor& Reference to the input grad tensor
 451    */
 452   Tensor &getInput(unsigned int idx);
 453
 454   /**
 455    * @brief Get the Input tensor object
 456    *
 457    * @param idx Identifier of the input
 458    * @return Tensor& Reference to the input grad tensor
 459    */
 460   const Tensor &getInput(unsigned int idx) const;
 461
 462   /**
 463    * @brief Get the Input Grad tensor object
 464    *
 465    * @param idx Identifier of the input
 466    * @return Tensor& Reference to the input grad tensor
 467    */
 468   Tensor &getInputGrad(unsigned int idx);
 469
 470   /**
 471    * @brief check if the weight has gradient
 472    *
 473    * @param idx Identifier of the weight
 474    * @return true if weight has gradient, else false
 475    */
 476   bool inputHasGradient(unsigned int idx) const;
 477
 478   /**
 479    * @brief Get the outgoing Derivative tensor object
 480    *
 481    * @param idx Identifier of the input
 482    * @return Tensor& Reference to the input derivative tensor
 483    */
 484   Tensor &getOutgoingDerivative(unsigned int idx);
 485
 486   /**
 487    * @brief Get the Tensor object
 488    *
 489    * @param idx Identifier of the tensor
 490    * @return Tensor& Reference to the tensor
 491    */
 492   Tensor &getTensor(unsigned int idx);
 493
 494   /**
 495    * @brief Get the Tensor object
 496    *
 497    * @param idx Identifier of the tensor
 498    * @return Tensor& Reference to the tensor
 499    */
 500   const Tensor &getTensor(unsigned int idx) const;
 501
 502   /**
 503    * @brief Get the Tensor Grad object
 504    *
 505    * @param idx Identifier of the tensor
 506    * @return Tensor& Reference to the tensor grad tensor
 507    */
 508   Tensor &getTensorGrad(unsigned int idx);
 509
 510   /**
 511    * @brief Get the Tensor Grad object
 512    *
 513    * @param idx Identifier of the tensor
 514    * @return Tensor& Reference to the tensor grad tensor
 515    */
 516   const Tensor &getTensorGrad(unsigned int idx) const;
 517
 518   /**
 519    * @brief check if the tensor has gradient
 520    *
 521    * @param idx Identifier of the tensor
 522    * @return true if tensor has gradient, else false
 523    */
 524   bool tensorHasGradient(unsigned int idx) const;
 525
 526   /**
 527    * @brief check if the weight is burrowed from others so it is dependent
 528    *
 529    * @param idx index
 530    * @return bool true if weight is burrowed from outside
 531    */
 532   bool isWeightDependent(unsigned int idx) const;
 533
 534   /**
 535    * @brief check current gradient is first access
 536    * @note for now, it equivalent to weight last access, so this value is
 537    * accessible for non-trainable weights as well. This is in terms of execution
 538    * order.
 539    *
 540    * @param idx index
 541    * @return bool true if first access
 542    */
 543   bool isGradientFirstAccess(unsigned int idx) const;
 544
 545   /**
 546    * @brief check current gradient is last access
 547    * @note for now, it equivalent to weight last access, so this value is
 548    * accessible for non-trainable weights as well. This is in terms of execution
 549    * order.
 550    *
 551    * @param idx index
 552    * @return bool true if last access
 553    */
 554   bool isGradientLastAccess(unsigned int idx) const;
 555
 556   /**
 557    * @brief check if the gradient is to be clipped by global norm
 558    *
 559    * @param idx index
 560    * @return bool true if it is to be clipped else false
 561    */
 562   bool isGradientClipByGlobalNorm(unsigned int idx) const;
 563
 564   /**
 565    * @brief Get the tensor name
 566    *
 567    * @param idx Identifier of the tensor
 568    * @return name of the tensor
 569    */
 570   const std::string &getTensorName(unsigned int idx) const;
 571
 572   /**
 573    * @brief Get the number of Outputs tensor objects
 574    *
 575    * @return unsigned int number of output tensors
 576    */
 577   unsigned int getNumOutputs() const { return outputs.size(); }
 578
 579   /**
 580    * @brief Get the number of inputs tensor objects
 581    *
 582    * @return unsigned int number of input tensors
 583    */
 584   unsigned int getNumInputs() const { return inputs.size(); }
 585
 586   /**
 587    * @brief Get the number of weights tensor objects
 588    *
 589    * @return unsigned int number of weight tensors
 590    */
 591   unsigned int getNumWeights() const { return weights.size(); }
 592
 593   /**
 594    * @brief Get the Number of Weight Optimizer Variable tensor object
 595    *
 596    * @param idx Identifier of the weight
 597    * @return unsigned int Number of the weight optimizer variable
 598    */
 599   unsigned int getNumWeightOptVar(unsigned int idx) const;
 600
 601   /**
 602    * @brief Get the number of requested tensors objects
 603    *
 604    * @return unsigned int number of requested tensors
 605    */
 606   unsigned int getNumTensors() const { return tensors.size(); }
 607
 608   /**
 609    * @brief Set the batch for the run context
 610    *
 611    * @param batch Update batch size
 612    */
 613   void setBatch(unsigned int batch);
 614
 615   /**
 616    * @brief Update the dimensions for a requested tensor
 617    *
 618    * @param idx index of the tensor (identifier)
 619    * @param batch Updated batch size
 620    */
 621   void updateTensor(unsigned int idx, unsigned int batch);
 622
 623   /**
 624    * @brief   Get weight object for the weights
 625    *
 626    * @param idx index of the weight (identifier)
 627    * @return weight object
 628    */
 629   Weight &getWeightObject(unsigned int idx);
 630
 631   /**
 632    * @brief   check if the label is available
 633    *
 634    * @param idx Identifier of the input
 635    * @return true if label is available else false
 636    */
 637   bool isLabelAvailable(unsigned int idx) const;
 638
 639   /**
 640    * @brief   Get label tensor
 641    *
 642    * @param idx Identifier of the input
 643    * @return Tensor& Reference to the label tensor
 644    */
 645   Tensor &getLabel(unsigned int idx);
 646
 647   /**
 648    * @brief   update loss by the layer
 649    *
 650    * @param val updated loss value
 651    * @note loss value is only used for loss layers. For non-loss layers, setting
 652    * this value will have no change on the behavior of the model.
 653    */
 654   void setLoss(float val) { loss = val; }
 655
 656   /**
 657    * @brief   update loss by the layer
 658    *
 659    * @return loss of the layer
 660    * @note does not includes the regularization loss.
 661    */
 662   float getLoss() const { return loss; }
 663
 664   /**
 665    * @brief   get regularization loss of the layer
 666    *
 667    * @return regularization loss of the layer
 668    */
 669   float getRegularizationLoss() const {
 670     float loss_ = 0;
 671     for (unsigned int idx = 0; idx < getNumWeights(); idx++) {
 672       loss_ += getWeightRegularizationLoss(idx);
 673     }
 674     return loss_;
 675   }
 676
 677   /**
 678    * @brief   get name by the layer
 679    *
 680    * @return name of the layer
 681    */
 682   const std::string &getName() const { return std::get<props::Name>(props); }
 683
 684   /**
 685    * @brief   get trainable by the layer
 686    *
 687    * @return trainable of the layer
 688    */
 689   bool getTrainable() const { return std::get<props::Trainable>(props); }
 690
 691   /**
 692    * @brief   check if run context is set and is ready to use
 693    *
 694    * @return true if ready, else false
 695    */
 696   bool readyToUse() const;
 697
 698   /**
 699    * @brief   validates the run context after run
 700    *
 701    * @param skip_input  skip verifying the input
 702    * @param skip_label  skip verifying the label
 703    *
 704    * @return true if ready, else false
 705    */
 706   bool validate(bool skip_input = false, bool skip_label = false);
 707
 708   /**
 709    * @brief   check if the layer is expected to run in-place
 710    *
 711    * @return true if in-place, else false
 712    */
 713   bool executeInPlace() const { return in_place; }
 714
 715 private:
 716   std::tuple<props::Name, props::Trainable> props; /**< props of the layer */
 717   float loss;                                      /**< loss of the layer */
 718   bool in_place; /**< if the layer is expected to run in-place */
 719
 720   std::vector<Weight *> weights;   /**< weights of the layer */
 721   std::vector<Var_Grad *> inputs;  /**< inputs of the layer */
 722   std::vector<Var_Grad *> outputs; /**< outputs of the layer */
 723   std::vector<Var_Grad *> tensors; /**< tensors of the layer */
 724
 725 #ifdef DEBUG
 726   std::map<std::string, const void *>
 727     tensor_map; /**< map of tensor name to tensor address */
 728 #endif
 729
 730   /**
 731    * @brief Get regularization loss for the weight
 732    *
 733    * @param idx Identifier of the weight
 734    * @return float Value of the loss
 735    */
 736   float getWeightRegularizationLoss(unsigned int idx) const;
 737 };
 738
 739 } // namespace nntrainer
 740 #endif // __LAYER_CONTEXT_H__