From b6166de637b25f4dd66e6e2479480ffa07b196aa Mon Sep 17 00:00:00 2001
From: Jihoon Lee <jhoon.it.lee@samsung.com>
Date: Sat, 9 Jan 2021 16:04:21 +0900
Subject: [PATCH] [Tensor] Add outplace method for arithmetic ops

Add outplace ops with already allocated tensor.

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
---
 nntrainer/tensor/tensor.cpp | 36 +++++++++++++++++++++++++++++++++---
 nntrainer/tensor/tensor.h   | 26 +++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 4 deletions(-)
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp
index bc2a636..86b0d12 100644
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -183,8 +183,21 @@ Tensor::Tensor(
           this->setValue(i, j, k, l, d[i][j][k][l]);
 }
 
-int Tensor::multiply_i(float const &value) {
+Tensor Tensor::multiply(float const &value, Tensor &out) const {
+  /// @note this is not depending on multiply_i as there is an optimized version
+  /// of multiply_i
+  CREATE_IF_EMPTY_DIMS(out, getDim());
+  const float *data = getData();
+
+  std::transform(data, data + length(), out.getData(),
+                 std::bind2nd(std::multiplies<float>(), value));
+
+  return out;
+}
 
+Tensor Tensor::multiply(float const &value) { CLONE_OP_I(multiply_i, value); }
+
+int Tensor::multiply_i(float const &value) {
   float *data = getData();
   unsigned int len = length();
 
@@ -192,8 +205,6 @@ int Tensor::multiply_i(float const &value) {
   return ML_ERROR_NONE;
 }
 
-Tensor Tensor::multiply(float const &value) { CLONE_OP_I(multiply_i, value); }
-
 int Tensor::divide_i(float const &value) {
   if (value == 0.0f) {
     return ML_ERROR_INVALID_PARAMETER;
@@ -219,8 +230,27 @@ int Tensor::add_i(float const &value) {
   return ML_ERROR_NONE;
 }
 
+Tensor Tensor::add(Tensor const &m, Tensor &out, float const alpha) const {
+  CREATE_IF_EMPTY_DIMS(out, getDim());
+  scopy(length(), getData(), 1, out.getData(), 1);
+  out.add_i(m, alpha);
+
+  return out;
+}
+
 Tensor Tensor::add(float const &value) { CLONE_OP_I(add_i, value); }
 
+Tensor Tensor::add(float const &value, Tensor &out) const {
+  const float *data = getData();
+
+  CREATE_IF_EMPTY_DIMS(out, getDim());
+
+  std::transform(data, data + length(), out.getData(),
+                 std::bind2nd(std::plus<float>(), value));
+
+  return out;
+}
+
 /**
  * @struct External Loop Info for broadcasted info
  * @brief External Loop Info for broadcasted iteration. Please refer to
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h
index db651a5..ee105a8 100644
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -119,7 +119,7 @@ public:
    * @brief     Constructor of Tensor with just width
    * @param[in] width Width of Tensor
    */
-  Tensor(int width) : Tensor(1, 1, 1, width){};
+  explicit Tensor(int width) : Tensor(1, 1, 1, width){};
 
   /**
    * @brief     Constructor of Tensor
@@ -251,6 +251,14 @@ public:
   Tensor multiply(float const &value);
 
   /**
+   * @brief     multiply value element by element
+   * @param[in] value multiplier
+   * @param[out] out out tensor to store the result
+   * @retval    Calculated Tensor
+   */
+  Tensor multiply(float const &value, Tensor &out) const;
+
+  /**
    * @brief     Divide value element by element immediately
    * @param[in] value divisor
    * @retval    #ML_ERROR_INVALID_PARAMETER Tensor dimension is not right
@@ -282,6 +290,14 @@ public:
   Tensor add(Tensor const &m, float const alpha = 1) const;
 
   /**
+   * @brief     Add Tensor Element by Element
+   * @param[in] m Tensor to be added
+   * @param[out] m Tensor to be out
+   * @retval    Calculated Tensor
+   */
+  Tensor add(Tensor const &m, Tensor &out, float const alpha = 1) const;
+
+  /**
    * @brief Add Tensor Element immediately to target tensor without mem copy
    * @param[in] value value to be added
    * @retval #ML_ERROR_NONE  Successful
@@ -297,6 +313,14 @@ public:
   Tensor add(float const &value);
 
   /**
+   * @brief     Add Tensor Element by Element
+   * @param[in] value value to be added
+   * @param[in] out Tensor to save output without allocating new memory
+   * @retval    Calculated Tensor
+   */
+  Tensor add(float const &value, Tensor &out) const;
+
+  /**
    * @brief     memcpyless version of subtract
    * @param[in] m Tensor to be subtracted
    * @retval #ML_ERROR_NONE  Successful
-- 
2.7.4