[weight] Weight cleanup related to initializer

author Parichay Kapoor <pk.kapoor@samsung.com>

Tue, 27 Jul 2021 06:47:48 +0000 (15:47 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Tue, 10 Aug 2021 22:46:58 +0000 (07:46 +0900)
author Parichay Kapoor <pk.kapoor@samsung.com>
Tue, 27 Jul 2021 06:47:48 +0000 (15:47 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Tue, 10 Aug 2021 22:46:58 +0000 (07:46 +0900)
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp

index bafbdc2471cc06624527718e76fc2f87a1f8e9e7..06f8e6f563e430ab6818d158a66c5cd56348b6d5 100644 (file)
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -153,10 +153,12 @@ void Tensor::allocate() {
      data = std::shared_ptr<float>(src_tensor->tensor()->data,
                                    src_tensor->tensor()->data.get() +
                                      src_tensor->offset());
+    /** as this memory is shared, do NOT initialize */
    } else {
      /// allocate new memory for the tensor data
      data = std::shared_ptr<float>(new float[dim.getDataLen()],
                                    std::default_delete<float[]>());
+    initialize();
    }
  }
  
@@ -541,9 +543,11 @@ void Tensor::createSharedDataTensor(const Tensor &src, Tensor &dest,
     * @note src.data and src.src_tensor CAN co-exist. src.src_tensor is stored
     * if the batch size of src is updated and needs reallocation.
     */
-  if (src.data)
-    dest.data = std::shared_ptr<float>(src.data, src.data.get() + offset);
-  else if (!src.src_tensor)
+  dest.data = nullptr;
+  if (src.data) {
+    dest.src_tensor = std::make_shared<SrcSharedTensor>(&src, offset);
+    dest.allocate();
+  } else if (!src.src_tensor)
      dest.src_tensor = std::make_shared<SrcSharedTensor>(&src, offset);
    else
      dest.src_tensor = std::make_shared<SrcSharedTensor>(
diff --git a/nntrainer/tensor/var_grad.cpp b/nntrainer/tensor/var_grad.cpp

index ab5a09cd9b41fc93abca52dcaa4cfed5faffea90..8eb2435d62ca1590c9fee5c4c89df0cd5e7bb6b6 100644 (file)
--- a/nntrainer/tensor/var_grad.cpp
+++ b/nntrainer/tensor/var_grad.cpp
@@ -26,6 +26,10 @@ Var_Grad::Var_Grad(const TensorDim &dim, const Tensor::Initializer init,
    name(name) {
    var = std::make_shared<Tensor>(dim, alloc_now, init);
    if (need_gradient)
+    /**
+     * @todo gradient initializer should be none, and then they should be set
+     * zero right before using by the user itself.
+     */
      grad = std::make_shared<Tensor>(dim, alloc_now, Tensor::Initializer::ZEROS);
    else
      grad = std::make_shared<Tensor>();
@@ -34,6 +38,7 @@ Var_Grad::Var_Grad(const TensorDim &dim, const Tensor::Initializer init,
  void Var_Grad::initializeVariable(const Tensor &preallocated) {
    if (!preallocated.empty()) {
      var->makeSharedDataTensor(preallocated);
+    /** intentionally not initialized tensor memory for shared tensors */
    }
  }
  
@@ -44,6 +49,7 @@ void Var_Grad::initializeGradient(const Tensor &preallocated) {
       * with other layers but the internal memory is.
       */
      grad->makeSharedDataTensor(preallocated);
+    /** intentionally not initialized tensor memory for shared tensors */
    }
    /**
     * No need to reset gradient here. With shared memory, each gradient setting
@@ -58,7 +64,8 @@ void Var_Grad::needsGradient(bool ng) {
    need_gradient = ng;
    if (need_gradient && grad->empty()) {
      bool alloc_now_ = var->isAllocated();
-    grad = std::make_shared<Tensor>(var->getDim(), alloc_now_);
+    grad =
+      std::make_shared<Tensor>(dim, alloc_now_, Tensor::Initializer::ZEROS);
    }
  }
  
diff --git a/nntrainer/tensor/var_grad.h b/nntrainer/tensor/var_grad.h

index 0f003f16cc0f40565c14c22a825d8bbf80146c34..d3700b6d7e6fd2aab5b1916d4354dc315503b957 100644 (file)
--- a/nntrainer/tensor/var_grad.h
+++ b/nntrainer/tensor/var_grad.h
@@ -234,10 +234,12 @@ public:
     *
     * @note New dimension must maintain the shape of the variable
     */
-  void reset(const TensorDim &tdim, bool ng) {
+  void reset(const TensorDim &tdim, Tensor::Initializer init, bool ng) {
      dim = tdim;
      if (!var->empty())
        var->reshape(dim);
+    var->initialize(init);
+
      if (!grad->empty())
        grad->reshape(dim);
      need_gradient = ng;
diff --git a/nntrainer/tensor/weight.cpp b/nntrainer/tensor/weight.cpp

index a7de41ff2c32aa61ed884a7aacd3a0537268f3fd..50db0ca5adeb117fbfd6777f4d3328ac3b0e6bfd 100644 (file)
--- a/nntrainer/tensor/weight.cpp
+++ b/nntrainer/tensor/weight.cpp
@@ -22,79 +22,14 @@ Weight::Weight(const TensorDim &dim, const Tensor::Initializer init,
                 const WeightRegularizer reg, const float reg_const, bool train,
                 bool alloc_now_, std::string name) :
    Var_Grad(dim, init, train, alloc_now_, name),
-  initializer(init),
    regularizer(reg),
    regularizer_constant(reg_const) {
-  if (initializer == Tensor::Initializer::NONE)
-    throw std::invalid_argument("Weight initializer unknown");
+  if (init == Tensor::Initializer::NONE)
+    throw std::invalid_argument("Weight initializer cannot be none");
    if (regularizer == WeightRegularizer::UNKNOWN)
      throw std::invalid_argument("Weight regularizer unknown");
  }
  
-void Weight::initializeVariable(const Tensor &preallocated) {
-  Var_Grad::initializeVariable(preallocated);
-
-  if (alloc_now)
-    runVariableInitializer();
-}
-
-void Weight::runVariableInitializer() {
-  Tensor &var_ref = getVariableRef();
-  const TensorDim dim = var_ref.getDim();
-
-  unsigned int fan_in, fan_out;
-
-  /// @fixme: when unit is equal to one, this does not work, we need to rely on
-  /// effective dimension then actual numbers here. For now, some heuristics
-  /// added to infer what would be fan_in/fan_out
-  if (dim.batch() * dim.channel() * dim.height() == 1) {
-    fan_out = fan_in = dim.width();
-  } else if (dim.batch() * dim.channel() == 1) { /// fully connected layers
-    fan_in = dim.height();
-    fan_out = dim.width();
-  } else { /// convolution filters, @todo extend this to > 4
-    auto field_size = dim.height() * dim.width();
-
-    // this also handles below cases.
-    // 1. fan_in = fan_out = 1 as well.
-    // 2. batch == 1, channel == 1 and height == 1, theoretical rank of 1
-    fan_in = dim.channel() * field_size;
-    fan_out = dim.batch() * field_size;
-  }
-
-  switch (initializer) {
-  case Tensor::Initializer::ZEROS:
-    var_ref.setZero();
-    break;
-  case Tensor::Initializer::ONES:
-    var_ref.setValue(1.0f);
-    break;
-  case Tensor::Initializer::LECUN_NORMAL:
-    var_ref.setRandNormal(0.0f, sqrtFloat(1.0f / fan_in));
-    break;
-  case Tensor::Initializer::XAVIER_NORMAL:
-    var_ref.setRandNormal(0.0f, sqrtFloat(2.0f / (fan_in + fan_out)));
-    break;
-  case Tensor::Initializer::HE_NORMAL:
-    var_ref.setRandNormal(0.0f, sqrtFloat(2.0f / (fan_in)));
-    break;
-  case Tensor::Initializer::LECUN_UNIFORM:
-    var_ref.setRandUniform(-1.0f * sqrtFloat(1.0f / fan_in),
-                           sqrtFloat(1.0f / fan_in));
-    break;
-  case Tensor::Initializer::XAVIER_UNIFORM:
-    var_ref.setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in + fan_out)),
-                           sqrtFloat(6.0 / (fan_in + fan_out)));
-    break;
-  case Tensor::Initializer::HE_UNIFORM:
-    var_ref.setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in)),
-                           sqrtFloat(6.0 / (fan_in)));
-    break;
-  default:
-    break;
-  }
-}
-
  void Weight::initializeGradient(const Tensor &preallocated) {
    // Use self variable to initialize itself
    Var_Grad::initializeGradient(preallocated);
diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h

index 71a343f519a8a2e65178696a8be51c7109759b84..67247727ed9488ef0416c2e8aa093e5d7513907a 100644 (file)
--- a/nntrainer/tensor/weight.h
+++ b/nntrainer/tensor/weight.h
@@ -41,7 +41,6 @@ public:
     */
    Weight() :
      Var_Grad(),
-    initializer(Tensor::Initializer::NONE),
      regularizer(WeightRegularizer::UNKNOWN),
      regularizer_constant(1.0f) {}
  
@@ -94,15 +93,9 @@ public:
     */
    explicit Weight(const Tensor &v, const Tensor &g, const std::string &n = "") :
      Var_Grad(v, g, n),
-    initializer(Tensor::Initializer::XAVIER_UNIFORM),
      regularizer(WeightRegularizer::NONE),
      regularizer_constant(1.0f) {}
  
-  /**
-   * @copydoc var_grad::initializeVariable(const Tensor &)
-   */
-  void initializeVariable(const Tensor &preallocated = Tensor());
-
    /**
     * @copydoc var_grad::initializeGradient(const Tensor &)
     */
@@ -118,7 +111,6 @@ public:
    friend void swap(Weight &lhs, Weight &rhs) noexcept {
      using std::swap;
      swap(static_cast<Var_Grad &>(lhs), static_cast<Var_Grad &>(rhs));
-    swap(lhs.initializer, rhs.initializer);
      swap(lhs.regularizer, rhs.regularizer);
    }
  
@@ -179,11 +171,10 @@ public:
     */
    void reset(const TensorDim &dim, const Tensor::Initializer init,
               const WeightRegularizer reg, const float reg_const, bool ng) {
-    initializer = init;
      regularizer = reg;
      regularizer_constant = reg_const;
  
-    Var_Grad::reset(dim, ng);
+    Var_Grad::reset(dim, init, ng);
    }
  
    /**
@@ -213,17 +204,13 @@ public:
    /**
     * @brief Allocate and initialize the weight variable, if needed
     */
-  void allocateVariable() {
-    Var_Grad::allocateVariable();
-    runVariableInitializer();
-  }
+  void allocateVariable() { Var_Grad::allocateVariable(); }
  
    /**
     * @brief Allocate and initialize the weight gradient, if needed
     */
    void allocateGradient() {
      Var_Grad::allocateGradient();
-    resetGradient();
      allocateOptimizerVariables();
    }
  
@@ -275,18 +262,12 @@ public:
    }
  
  private:
-  Tensor::Initializer initializer; /**< initializer for this variable */
-  WeightRegularizer regularizer;   /**< regularizer for this variable */
-  float regularizer_constant;      /**< constant factor for regularization */
+  WeightRegularizer regularizer; /**< regularizer for this variable */
+  float regularizer_constant;    /**< constant factor for regularization */
  
    std::vector<Tensor> opt_vars;        /**< optimizer variables */
    std::vector<TensorDim> opt_vars_dim; /**< optimizer variables dimensions */
  
-  /**
-   * @brief Initialize the weight with the initializer
-   */
-  void runVariableInitializer();
-
    /**
     * @brief Allocate optimizer related variables for the given weights
     */
author	Parichay Kapoor <pk.kapoor@samsung.com>
	Tue, 27 Jul 2021 06:47:48 +0000 (15:47 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Tue, 10 Aug 2021 22:46:58 +0000 (07:46 +0900)
nntrainer/tensor/tensor.cpp		patch \| blob \| history
nntrainer/tensor/var_grad.cpp		patch \| blob \| history
nntrainer/tensor/var_grad.h		patch \| blob \| history
nntrainer/tensor/weight.cpp		patch \| blob \| history
nntrainer/tensor/weight.h		patch \| blob \| history