[ Weight ] split variable dim and grad dim to set separately

author jijoong.moon <jijoong.moon@samsung.com>

Fri, 26 Apr 2024 10:13:05 +0000 (19:13 +0900)

committer MyungJoo Ham <myungjoo.ham@samsung.com>

Sat, 4 May 2024 05:20:40 +0000 (14:20 +0900)
author jijoong.moon <jijoong.moon@samsung.com>
Fri, 26 Apr 2024 10:13:05 +0000 (19:13 +0900)
committer MyungJoo Ham <myungjoo.ham@samsung.com>
Sat, 4 May 2024 05:20:40 +0000 (14:20 +0900)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp

index 68f5dc6c72886324dd12cd28df4429fff68722d8..2d4cfdc76954c29369b96e743e5e11e7875f5572 100644 (file)
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -869,7 +869,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
  
      const auto &w_specs = init_context.getWeightsSpec();
      for (auto i = 0u; i < w_specs.size(); ++i) {
-      shared_weight_names.emplace_back(std::get<7>(w_specs.at(i)));
+      shared_weight_names.emplace_back(std::get<8>(w_specs.at(i)));
      }
    }
  
@@ -1018,7 +1018,7 @@ NetworkGraph::refinalizeContext(const std::shared_ptr<LayerNode> &lnode,
  
      const auto &w_specs = init_context.getWeightsSpec();
      for (auto i = 0u; i < w_specs.size(); ++i) {
-      shared_weight_names.emplace_back(std::get<7>(w_specs.at(i)));
+      shared_weight_names.emplace_back(std::get<8>(w_specs.at(i)));
      }
    }
  
diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h

index cd37153f50ccea1b3007fa4ee1f8988f527401ba..fe3938f75356c5cc19ec5ab66791d087f0c9475c 100644 (file)
--- a/nntrainer/layers/layer_context.h
+++ b/nntrainer/layers/layer_context.h
@@ -172,7 +172,8 @@ public:
    /**
     * @brief Request a new weight for the layer
     *
-   * @param dim dimension of the weight
+   * @param dim_v dimension of Variagble of the weight
+   * @param dim_g dimension of Gradient of the weight
     * @param init initializer for the weight
     * @param reg regularizer for the weight
     * @param reg_const regularization constant for the weight
@@ -188,7 +189,14 @@ public:
                               const WeightRegularizer reg, const float reg_const,
                               const float decay, const std::string &name,
                               bool trainable = true, unsigned int out_axis = 3) {
-    weights_spec.emplace_back(dim, init, reg, reg_const, decay,
+
+    /** @note : We assumes the gradient type is same with Activation data
+     * type.*/
+    TensorDim dim_g(dim);
+
+    dim_g.setDataType(getActivationDataType());
+
+    weights_spec.emplace_back(dim, dim_g, init, reg, reg_const, decay,
                                clip_by_global_norm, trainable,
                                prefix + ":" + name, out_axis, loss_scale);
      return weights_spec.size() - 1;
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp

index 4178330ebdc8e14aba9ca59f46437699d9a16287..9a0d235ba97fabe81dd5b2ac3ad911a107956987 100644 (file)
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -52,10 +52,7 @@
  
  namespace nntrainer {
  MMapedMemory::MMapedMemory(size_t size, bool allocate_fd_) :
-  fd(-1),
-  buf(nullptr),
-  buf_size(0),
-  allocate_fd(allocate_fd_) {
+  fd(-1), buf(nullptr), buf_size(0), allocate_fd(allocate_fd_) {
  
  #ifndef __ANDROID__
    if (allocate_fd) {
@@ -386,8 +383,9 @@ std::vector<Weight *> Manager::requestWeights(
    size_t current_size = weights_v2.size();
  
    for (unsigned int i = 0; i < weights_spec.size(); ++i) {
-    auto &[dim, t_initializer, w_reg, w_reg_const, decay, clip_by_global_norm,
-           need_gradient, name, axis] = weights_spec.at(i);
+    auto &[dim_v, dim_g, t_initializer, w_reg, w_reg_const, decay,
+           clip_by_global_norm, need_gradient, name, axis, loss_scale] =
+      weights_spec.at(i);
  
      std::vector<unsigned int> var_exec_order;
      for (auto order : default_var_exec_order) {
@@ -422,7 +420,7 @@ std::vector<Weight *> Manager::requestWeights(
        /// shared_name is used and the orignal name is discarded
        const auto &shared_name = shared_names.at(i);
        /** case when shared names are given */
-      var = weight_pool.requestOrExtend(shared_name, dim, var_exec_order,
+      var = weight_pool.requestOrExtend(shared_name, dim_v, var_exec_order,
                                          var_ls, t_initializer);
  
        if (trainable && need_gradient) {
@@ -431,13 +429,13 @@ std::vector<Weight *> Manager::requestWeights(
           * for each layer anymore and it is hard to overwritten.
           */
          grad = tensor_pool.requestOrExtend(shared_name + Var_Grad::grad_suffix,
-                                           dim, grad_exec_order, grad_ls,
+                                           dim_g, grad_exec_order, grad_ls,
                                             Tensor::Initializer::ZEROS);
        }
      } else {
        /** case requesting fresh weights */
        var =
-        weight_pool.request(name, dim, var_exec_order, var_ls, t_initializer);
+        weight_pool.request(name, dim_v, var_exec_order, var_ls, t_initializer);
  
        if (trainable && need_gradient) {
          /** is_wgrad is the index which is true when it is the gradient tensor
@@ -447,7 +445,7 @@ std::vector<Weight *> Manager::requestWeights(
          bool is_wgrad = true;
          if (Weight::isGradientClipByGlobalNorm(clip_by_global_norm))
            is_wgrad = false;
-        grad = tensor_pool.request(name + Var_Grad::grad_suffix, dim,
+        grad = tensor_pool.request(name + Var_Grad::grad_suffix, dim_g,
                                     grad_exec_order, grad_ls,
                                     Tensor::Initializer::ZEROS, is_wgrad);
        }
diff --git a/nntrainer/tensor/tensor_wrap_specs.h b/nntrainer/tensor/tensor_wrap_specs.h

index aab767d14f5a1db33af4c24dbf669c90511a291b..6a5195fef51697b0545e020cb4e1020d466f5e2e 100644 (file)
--- a/nntrainer/tensor/tensor_wrap_specs.h
+++ b/nntrainer/tensor/tensor_wrap_specs.h
@@ -75,8 +75,9 @@ enum class TensorLifespan {
   * regularizer_constant, decay, clip gradient constant, need_gradient property,
   * name, output axis of the tensor object and loss Scale Factor.
   */
-typedef std::tuple<TensorDim, Tensor::Initializer, WeightRegularizer, float,
-                   float, float, bool, const std::string, unsigned int, float>
+typedef std::tuple<TensorDim, TensorDim, Tensor::Initializer, WeightRegularizer,
+                   float, float, float, bool, const std::string, unsigned int,
+                   float>
    WeightSpec;
  
  /**
diff --git a/nntrainer/tensor/var_grad.cpp b/nntrainer/tensor/var_grad.cpp

index 5fc5d8930dc3050fb29ce017cd2354850e6db1f8..09dbf6267e47bd0af3f4a0f536526fafedbd31c6 100644 (file)
--- a/nntrainer/tensor/var_grad.cpp
+++ b/nntrainer/tensor/var_grad.cpp
@@ -38,6 +38,27 @@ Var_Grad::Var_Grad(const TensorDim &dim, const Tensor::Initializer init,
      grad = std::make_shared<Tensor>(grad_name);
  }
  
+Var_Grad::Var_Grad(const TensorDim &dim_v, const TensorDim &dim_g,
+                   const Tensor::Initializer init, bool need_gradient,
+                   bool alloc_now, const std::string &name) :
+  is_dependent(false),
+  is_first_access_gradient(false),
+  is_last_access_gradient(false) {
+  var = std::make_shared<Tensor>(dim_v, alloc_now, init, name);
+
+  std::string grad_name = name + grad_suffix;
+  if (need_gradient)
+    /**
+     * @todo gradient initializer should be none, and then they should be set
+     * zero right before using by the user itself.
+     */
+
+    grad = std::make_shared<Tensor>(dim_g, alloc_now,
+                                    Tensor::Initializer::ZEROS, grad_name);
+  else
+    grad = std::make_shared<Tensor>(grad_name);
+}
+
  void Var_Grad::initializeVariable(const Tensor &preallocated) {
    /**
     * Making a new tensor is intentional here as this tensor is not shared
diff --git a/nntrainer/tensor/var_grad.h b/nntrainer/tensor/var_grad.h

index dfe1b9a0b3151a79ab175e2ff05791795441d1cb..52cabbc055d0a25fc0873d43d35eb9475b30affb 100644 (file)
--- a/nntrainer/tensor/var_grad.h
+++ b/nntrainer/tensor/var_grad.h
@@ -59,6 +59,20 @@ public:
                      bool ng = true, bool alloc_now = false,
                      const std::string &name = "");
  
+  /**
+   * @brief Construct a new Var_Grad object
+   *
+   * @param dim_v Variable tensor dimension
+   * @param dim_g Gradient tensor dimension
+   * @param ng If the variable is need_gradient
+   * @param alloc_now The memory for the var_grad tensors be allocated upon init
+   * @param name Name for this Var_Grad
+   */
+  explicit Var_Grad(const TensorDim &dim_v, const TensorDim &dim_g,
+                    const Tensor::Initializer init = Tensor::Initializer::NONE,
+                    bool ng = true, bool alloc_now = false,
+                    const std::string &name = "");
+
    /**
     * @brief Construct a new Var_Grad object
     *
diff --git a/nntrainer/tensor/weight.cpp b/nntrainer/tensor/weight.cpp

index 93c4ee5fb4d354d979ba2e5414612a8c0af851bf..f98c8c83565ef1e82de7c3cac8de2284bf9ee691 100644 (file)
--- a/nntrainer/tensor/weight.cpp
+++ b/nntrainer/tensor/weight.cpp
@@ -29,8 +29,25 @@ Weight::Weight(const TensorDim &dim, const Tensor::Initializer init,
    decay(decay_const),
    clip_by_global_norm(max_norm),
    output_axis(axis),
-  loss_scale(loss_scale_);
-{
+  loss_scale(loss_scale_) {
+  if (init == Tensor::Initializer::NONE)
+    throw std::invalid_argument("Weight initializer cannot be none");
+  if (regularizer == WeightRegularizer::UNKNOWN)
+    throw std::invalid_argument("Weight regularizer unknown");
+}
+
+Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g,
+               const Tensor::Initializer init, const WeightRegularizer reg,
+               const float reg_const, const float decay_const,
+               const float max_norm, bool train, bool alloc_now_,
+               std::string name, unsigned int axis, float loss_scale_) :
+  Var_Grad(dim_v, dim_g, init, train, alloc_now_, name),
+  regularizer(reg),
+  regularizer_constant(reg_const),
+  decay(decay_const),
+  clip_by_global_norm(max_norm),
+  output_axis(axis),
+  loss_scale(loss_scale_) {
    if (init == Tensor::Initializer::NONE)
      throw std::invalid_argument("Weight initializer cannot be none");
    if (regularizer == WeightRegularizer::UNKNOWN)
diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h

index 2be6460caa0d56e7e1db29c205356b46940064ab..552f6d573934358ff9072846b4206e7034e924b7 100644 (file)
--- a/nntrainer/tensor/weight.h
+++ b/nntrainer/tensor/weight.h
@@ -68,23 +68,45 @@ public:
      bool alloc_now = false, std::string name = "", unsigned int axis = 3,
      float loss_scale_ = 0.0);
  
+  /**
+   * @brief Construct a new Weight object
+   *
+   * @param dim_v Variable and gradient tensor dimension
+   * @param dim_g Gradient tensor dimension
+   * @param init Initializer for the weight
+   * @param reg Regularizer for the weight
+   * @param reg_const Constant multiplier for regularizer
+   * @param ng If the variable needs gradient
+   * @param alloc_now The memory for the weight tensors be allocated upon init
+   * @param name Name for this weight
+   */
+  explicit Weight(
+    const TensorDim &dim_v, const TensorDim &dim_g,
+    const Tensor::Initializer init = Tensor::Initializer::XAVIER_UNIFORM,
+    const WeightRegularizer reg = WeightRegularizer::NONE,
+    const float reg_const = 1.0f, const float decay = 0.0f,
+    const float clip_by_global_norm = 0.0f, bool ng = true,
+    bool alloc_now = false, std::string name = "", unsigned int axis = 3,
+    float loss_scale_ = 0.0);
+
    /**
     * @brief Construct a new Weight object
     *
     * @param spec Weight specification
     */
    explicit Weight(const Spec &spec, bool alloc_now = false) :
-    Weight(std::get<0>(spec), // TensorDim
-           std::get<1>(spec), // Tensor::Initializer
-           std::get<2>(spec), // WeightRegularizer
-           std::get<3>(spec), // WeightRegularizerConstant
-           std::get<4>(spec), // weight decay constant
-           std::get<5>(spec), // MaxNorm for clipping
-           std::get<6>(spec), // need_gradient
+    Weight(std::get<0>(spec), // TensorDim for Variable
+           std::get<1>(spec), // TensorDim for Gradient
+           std::get<2>(spec), // Tensor::Initializer
+           std::get<3>(spec), // WeightRegularizer
+           std::get<4>(spec), // WeightRegularizerConstant
+           std::get<5>(spec), // weight decay constant
+           std::get<6>(spec), // MaxNorm for clipping
+           std::get<7>(spec), // need_gradient
             alloc_now,
-           std::get<7>(spec), // Name
-           std::get<8>(spec), // out axis
-           std::get<9>(spec)  // loss scale
+           std::get<8>(spec), // Name
+           std::get<9>(spec), // out axis
+           std::get<10>(spec) // loss scale
      ) {}
  
    /**
@@ -122,13 +144,14 @@ public:
    explicit Weight(Tensor *v, Tensor *g, const WeightRegularizer reg,
                    const float reg_const, const float decay,
                    bool is_dependent = false, const float max_norm = 0.0f,
-                  unsigned int output_axis_ = 3) :
+                  unsigned int output_axis_ = 3, float loss_scale_ = 0.0f) :
      Var_Grad(v, g, is_dependent),
      regularizer(reg),
      regularizer_constant(reg_const),
      decay(decay),
      clip_by_global_norm(max_norm),
-    output_axis(output_axis_) {}
+    output_axis(output_axis_),
+    loss_scale(loss_scale_) {}
  
    /**
     * @brief Swap for weight
@@ -315,6 +338,7 @@ private:
    unsigned int output_axis;
    float loss_scale;
    std::vector<Tensor *> opt_vars; /**< optimizer variables */
+  std::shared_ptr<Tensor> var32;
  
    /**
     * @brief     Apply the weight decay to the weight
diff --git a/nntrainer/utils/node_exporter.cpp b/nntrainer/utils/node_exporter.cpp

index eabf9234f1d3c3f931f19c71ccf7edd38e465f28..031d2c2fbfb4be4b3ba69f8759d9759d58b42e27 100644 (file)
--- a/nntrainer/utils/node_exporter.cpp
+++ b/nntrainer/utils/node_exporter.cpp
@@ -91,7 +91,8 @@ void Exporter::saveTflResult(
    const std::tuple<props::Name, props::Distribute, props::Trainable,
                     std::vector<props::InputConnection>,
                     std::vector<props::InputShape>, props::SharedFrom,
-                   props::ClipGradByGlobalNorm, props::Packed> &props,
+                   props::ClipGradByGlobalNorm, props::Packed,
+                   props::LossScaleForMixed> &props,
    const LayerNode *self) {
    createIfNull(tf_node);
    tf_node->setLayerNode(*self);
diff --git a/nntrainer/utils/node_exporter.h b/nntrainer/utils/node_exporter.h

index 84c38894f14e1c2f47400dc51b3abfa2e73044e5..de29cf77d934196e196696eb710105aa716b50d0 100644 (file)
--- a/nntrainer/utils/node_exporter.h
+++ b/nntrainer/utils/node_exporter.h
@@ -234,6 +234,7 @@ class DisableBias;
  class Activation;
  class BatchNormalization;
  class Packed;
+class LossScaleForMixed;
  } // namespace props
  
  class LayerNode;
@@ -243,11 +244,11 @@ class LayerNode;
   */
  template <>
  void Exporter::saveTflResult(
-
    const std::tuple<props::Name, props::Distribute, props::Trainable,
                     std::vector<props::InputConnection>,
                     std::vector<props::InputShape>, props::SharedFrom,
-                   props::ClipGradByGlobalNorm, props::Packed> &props,
+                   props::ClipGradByGlobalNorm, props::Packed,
+                   props::LossScaleForMixed> &props,
    const LayerNode *self);
  
  class BatchNormalizationLayer;
author	jijoong.moon <jijoong.moon@samsung.com>
	Fri, 26 Apr 2024 10:13:05 +0000 (19:13 +0900)
committer	MyungJoo Ham <myungjoo.ham@samsung.com>
	Sat, 4 May 2024 05:20:40 +0000 (14:20 +0900)
nntrainer/graph/network_graph.cpp		patch \| blob \| history
nntrainer/layers/layer_context.h		patch \| blob \| history
nntrainer/tensor/manager.cpp		patch \| blob \| history
nntrainer/tensor/tensor_wrap_specs.h		patch \| blob \| history
nntrainer/tensor/var_grad.cpp		patch \| blob \| history
nntrainer/tensor/var_grad.h		patch \| blob \| history
nntrainer/tensor/weight.cpp		patch \| blob \| history
nntrainer/tensor/weight.h		patch \| blob \| history
nntrainer/utils/node_exporter.cpp		patch \| blob \| history
nntrainer/utils/node_exporter.h		patch \| blob \| history