[Tensor] Update newly added features

author Donghyeon Jeong <dhyeon.jeong@samsung.com>

Tue, 9 Jul 2024 11:57:46 +0000 (20:57 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Fri, 26 Jul 2024 01:27:26 +0000 (10:27 +0900)
author Donghyeon Jeong <dhyeon.jeong@samsung.com>
Tue, 9 Jul 2024 11:57:46 +0000 (20:57 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Fri, 26 Jul 2024 01:27:26 +0000 (10:27 +0900)
diff --git a/nntrainer/tensor/float_tensor.cpp b/nntrainer/tensor/float_tensor.cpp

index 2652610d15e172107119910a0aeab4268e31f0c3..5ac40ca7817e5bef18bbac70fccfca164bf67e07 100644 (file)
--- a/nntrainer/tensor/float_tensor.cpp
+++ b/nntrainer/tensor/float_tensor.cpp
@@ -396,18 +396,8 @@ Tensor &FloatTensor::multiply(Tensor const &m, Tensor &output,
                                const float beta) const {
    auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf,
                 float *out_buf) {
-    if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1 &&
-        std::fpclassify(beta) == FP_ZERO) {
-      std::transform(buf, buf + e.buffer_size, m_buf, out_buf,
-                     std::multiplies<float>());
-    } else {
-      for (unsigned int i = 0; i < e.buffer_size; ++i) {
-        *out_buf = *buf * *m_buf + beta * *out_buf;
-        buf += strides[3];
-        m_buf += e.strides[3];
-        out_buf += output.getStrides()[3];
-      }
-    }
+    ele_mul(e.buffer_size, buf, m_buf, out_buf, 1, beta, e.strides[3],
+            strides[3]);
    };
  
    NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument)
@@ -436,17 +426,7 @@ Tensor &FloatTensor::divide(float const &value, Tensor &output) const {
  Tensor &FloatTensor::divide(Tensor const &m, Tensor &output) const {
    auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf,
                 float *out_buf) {
-    if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1) {
-      std::transform(buf, buf + e.buffer_size, m_buf, out_buf,
-                     std::divides<float>());
-    } else {
-      for (unsigned int i = 0; i < e.buffer_size; ++i) {
-        *out_buf = *buf / *m_buf;
-        buf += strides[3];
-        m_buf += e.strides[3];
-        out_buf += output.getStrides()[3];
-      }
-    }
+    ele_div(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], strides[3]);
    };
  
    apply_broadcast(m, f, output);
@@ -522,6 +502,15 @@ int FloatTensor::add_i(Tensor const &m, Tensor &output, float const alpha) {
    return ML_ERROR_NONE;
  }
  
+int FloatTensor::add_i_partial(unsigned int len, unsigned int addr_idx,
+                               Tensor &m, unsigned int incX, unsigned int incY,
+                               const Tensor alphas, unsigned int alpha_idx) {
+  saxpy(len, alphas.getValue<float>(alpha_idx), m.getData<float>(), incX,
+        (float *)getAddress(addr_idx), incY);
+
+  return ML_ERROR_NONE;
+}
+
  Tensor &FloatTensor::add(float const &value, Tensor &output) const {
    auto f = std::bind(std::plus<float>(), std::placeholders::_1, value);
    apply(f, output);
@@ -532,18 +521,8 @@ Tensor &FloatTensor::add(Tensor const &m, Tensor &output,
                           float const alpha) const {
    auto f = [&](const BroadcastInfo &e, const float *buf, const float *m_buf,
                 float *out_buf) {
-    if (e.strides[3] == 1 && strides[3] == 1 && strides[3] == 1 &&
-        std::fpclassify(alpha) == FP_ZERO) {
-      std::transform(buf, buf + e.buffer_size, m_buf, out_buf,
-                     std::plus<float>());
-    } else {
-      for (unsigned int i = 0; i < e.buffer_size; ++i) {
-        *out_buf = *buf + *m_buf * alpha;
-        buf += strides[3];
-        m_buf += e.strides[3];
-        out_buf += strides[3];
-      }
-    }
+    ele_add(e.buffer_size, buf, m_buf, out_buf, alpha, 0, e.strides[3],
+            strides[3]);
    };
    apply_broadcast(m, f, output);
    return output;
diff --git a/nntrainer/tensor/float_tensor.h b/nntrainer/tensor/float_tensor.h

index 5463e9f1ccccb1c0aa349ddd64c71fb112129a9b..1adfebb2264af73aa7036070b462d89ce8f2f12a 100644 (file)
--- a/nntrainer/tensor/float_tensor.h
+++ b/nntrainer/tensor/float_tensor.h
@@ -267,6 +267,13 @@ public:
     */
    int add_i(Tensor const &m, Tensor &output, float const alpha) override;
  
+  /**
+   * @copydoc Tensor::add_i_partial()
+   */
+  int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m,
+                    unsigned int incX, unsigned int incY, const Tensor alphas,
+                    unsigned int alpha_idx) override;
+
    /**
     * @copydoc Tensor::add(float const &value, Tensor &output)
     */
diff --git a/nntrainer/tensor/half_tensor.cpp b/nntrainer/tensor/half_tensor.cpp

index 2f66f1c074bee48f0405426e13511c5c965ad609..26ac4e8546369d54ba2ebb98b2ced98eada80d24 100644 (file)
--- a/nntrainer/tensor/half_tensor.cpp
+++ b/nntrainer/tensor/half_tensor.cpp
@@ -395,17 +395,8 @@ Tensor &HalfTensor::multiply(Tensor const &m, Tensor &output,
                               const float beta) const {
    auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
                 _FP16 *out_buf) {
-    if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1 &&
-        std::fpclassify(beta) == FP_ZERO) {
-      ele_mul(e.buffer_size, buf, m_buf, out_buf);
-    } else {
-      for (unsigned int i = 0; i < e.buffer_size; ++i) {
-        *out_buf = *buf * *m_buf + static_cast<_FP16>(beta) * *out_buf;
-        buf += strides[3];
-        m_buf += e.strides[3];
-        out_buf += output.getStrides()[3];
-      }
-    }
+    ele_mul(e.buffer_size, buf, m_buf, out_buf, 1, beta, e.strides[3],
+            strides[3]);
    };
  
    NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument)
@@ -495,6 +486,15 @@ int HalfTensor::add_i(Tensor const &m, Tensor &output, float const alpha) {
    return ML_ERROR_NONE;
  }
  
+int HalfTensor::add_i_partial(unsigned int len, unsigned int addr_idx,
+                              Tensor &m, unsigned int incX, unsigned int incY,
+                              const Tensor alphas, unsigned int alpha_idx) {
+  saxpy(len, alphas.getValue<_FP16>(alpha_idx), m.getData<_FP16>(), incX,
+        (_FP16 *)getAddress(addr_idx), incY);
+
+  return ML_ERROR_NONE;
+}
+
  Tensor &HalfTensor::add(float const &value, Tensor &output) const {
    auto f = std::bind(std::plus<_FP16>(), std::placeholders::_1,
                       static_cast<_FP16>(value));
@@ -506,16 +506,8 @@ Tensor &HalfTensor::add(Tensor const &m, Tensor &output,
                          float const alpha) const {
    auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
                 _FP16 *out_buf) {
-    if (e.strides[3] == 1 && strides[3] == 1 && strides[3] == 1 && alpha == 1) {
-      ele_add(e.buffer_size, buf, m_buf, out_buf);
-    } else {
-      for (unsigned int i = 0; i < e.buffer_size; ++i) {
-        *out_buf = *buf + *m_buf * static_cast<_FP16>(alpha);
-        buf += strides[3];
-        m_buf += e.strides[3];
-        out_buf += strides[3];
-      }
-    }
+    ele_add(e.buffer_size, buf, m_buf, out_buf, alpha, 0, e.strides[3],
+            strides[3]);
    };
    apply_broadcast(m, f, output);
    return output;
@@ -1035,17 +1027,7 @@ Tensor &HalfTensor::divide(float const &value, Tensor &output) const {
  Tensor &HalfTensor::divide(Tensor const &m, Tensor &output) const {
    auto f = [&](const BroadcastInfo &e, const _FP16 *buf, const _FP16 *m_buf,
                 _FP16 *out_buf) {
-    if (e.strides[3] == 1 && output.getStrides()[3] == 1 && strides[3] == 1) {
-      std::transform(buf, buf + e.buffer_size, m_buf, out_buf,
-                     std::divides<_FP16>());
-    } else {
-      for (unsigned int i = 0; i < e.buffer_size; ++i) {
-        *out_buf = *buf / *m_buf;
-        buf += strides[3];
-        m_buf += e.strides[3];
-        out_buf += output.getStrides()[3];
-      }
-    }
+    ele_div(e.buffer_size, buf, m_buf, out_buf, 1, 0, e.strides[3], strides[3]);
    };
  
    apply_broadcast(m, f, output);
@@ -1136,7 +1118,14 @@ Tensor &HalfTensor::transpose(const std::string &direction,
        }
      } else {
        if (is_format_nchw) {
-        transposeloop(l, i, k, j, SL, SI, SK, SJ);
+        for (unsigned int b = 0; b < batch(); ++b) {
+          for (unsigned int c = 0; c < channel(); ++c) {
+            transpose_matrix(
+              height(), width(), (_FP16 *)getData() + getIndex(b, c, 0, 0),
+              width(), (_FP16 *)output.getData() + output.getIndex(b, c, 0, 0),
+              output.width());
+          }
+        }
        } else {
          transposeloop_nhwc(l, k, j, i, SL, SK, SJ, SI);
        }
diff --git a/nntrainer/tensor/half_tensor.h b/nntrainer/tensor/half_tensor.h

index 6ca35e4fd2249d163aeaca69f7a10a6722bba78b..439882076ce3149ff7ce8aefb6f77caa10a852b4 100644 (file)
--- a/nntrainer/tensor/half_tensor.h
+++ b/nntrainer/tensor/half_tensor.h
@@ -267,6 +267,13 @@ public:
     */
    int add_i(Tensor const &m, Tensor &output, float const alpha) override;
  
+  /**
+   * @copydoc Tensor::add_i_partial()
+   */
+  int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m,
+                    unsigned int incX, unsigned int incY, const Tensor alphas,
+                    unsigned int alpha_idx) override;
+
    /**
     * @copydoc Tensor::add(float const &value, Tensor &output)
     */
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp

index 28bc271f7d5f64b16c8d358ae0ce99f5f9a053b9..2c815e2913006ab33434e2917c4e98661937b378 100644 (file)
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -251,6 +251,20 @@ Tensor Tensor::multiply(Tensor const &m, const float beta) const {
  
  Tensor &Tensor::multiply(Tensor const &m, Tensor &output,
                           const float beta) const {
+  NNTR_THROW_IF(m.getFormat() != this->getFormat(), std::invalid_argument)
+    << "Tensor Format of " << getName() << ":"
+    << ((bool)(this->getFormat()) ? "NHWC" : "NCHW") << " is not match. ("
+    << ((bool)(m.getFormat()) ? "NHWC" : "NCHW") << ")";
+
+  NNTR_THROW_IF(!getContiguous() || !m.getContiguous() ||
+                  !output.getContiguous(),
+                std::invalid_argument)
+    << getName() << " is not contiguous, cannot multiply";
+
+  NNTR_THROW_IF(!getContiguous() || !m.getContiguous() ||
+                  !output.getContiguous(),
+                std::invalid_argument)
+    << getName() << " is not contiguous, cannot multiply";
    itensor->multiply(m, output, beta);
    return output;
  }
@@ -355,19 +369,8 @@ int Tensor::add_i(Tensor const &m, float const alpha) {
  int Tensor::add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m,
                            unsigned int incX, unsigned int incY,
                            const Tensor alphas, unsigned int alpha_idx) {
-  if (dim.getDataType() == ml::train::TensorDim::DataType::FP32) {
-    saxpy(len, alphas.getValue<float>(alpha_idx), m.getData<float>(), incX,
-          getAddress<float>(addr_idx), incY);
-  } else if (dim.getDataType() == ml::train::TensorDim::DataType::FP16) {
-#ifdef ENABLE_FP16
-    saxpy(len, alphas.getValue<_FP16>(alpha_idx), m.getData<_FP16>(), incX,
-          getAddress<_FP16>(addr_idx), incY);
-#else
-    ml_loge("%s", "Error: enable-fp16 is not enabled");
-    return ML_ERROR_INVALID_PARAMETER;
-#endif
-  }
-  return ML_ERROR_NONE;
+  return itensor->add_i_partial(len, addr_idx, m, incX, incY, alphas,
+                                alpha_idx);
  }
  
  Tensor Tensor::add(Tensor const &m, float const alpha) const {
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h

index 789a4eb6cb2689ba314acea6368db2a7115170f4..bfd98978405e8b0a21688e000641951c74ff3d7e 100644 (file)
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -23,6 +23,7 @@
  
  #include <cstddef>
  
+#include <blas_interface.h>
  #include <nntrainer_log.h>
  #include <tensor_base.h>
  
@@ -735,19 +736,19 @@ public:
     */
    int add_i(Tensor const &m, float const alpha = 1.F);
  
-/**
- * @brief Do add_i for specific section
- * 
- * @param len Length of the specific section
- * @param addr_idx Starting index of the psecific section
- * @param m Input Tensor to be added
- * @param incX Incremental index of X
- * @param incY Incremental index of Y
- * @param alphas Vector of multiple alpha values
- * @param alpha_idx Index of alpha in alpha vector
- * @retval #ML_ERROR_NONE  Successful
- * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
- */
+  /**
+   * @brief Do add_i for specific section
+   *
+   * @param len Length of the specific section
+   * @param addr_idx Starting index of the psecific section
+   * @param m Input Tensor to be added
+   * @param incX Incremental index of X
+   * @param incY Incremental index of Y
+   * @param alphas Vector of multiple alpha values
+   * @param alpha_idx Index of alpha in alpha vector
+   * @retval #ML_ERROR_NONE  Successful
+   * @retval #ML_ERROR_INVALID_PARAMETER Invalid Parameter
+   */
    int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m,
                      unsigned int incX, unsigned int incY, const Tensor alphas,
                      unsigned int alpha_idx);
diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h

index 945b82b3c693e47d1bbf7693b6eaf6a8963f5d3d..1831de0a8fac93f3f780d7607ce1ab3b254e7549 100644 (file)
--- a/nntrainer/tensor/tensor_base.h
+++ b/nntrainer/tensor/tensor_base.h
@@ -283,6 +283,13 @@ public:
     */
    virtual int add_i(Tensor const &m, Tensor &output, float const alpha) = 0;
  
+  /**
+   * @copydoc Tensor::add_i_partial()
+   */
+  virtual int add_i_partial(unsigned int len, unsigned int addr_idx, Tensor &m,
+                            unsigned int incX, unsigned int incY,
+                            const Tensor alphas, unsigned int alpha_idx) = 0;
+
    /**
     * @copydoc Tensor::add(float const &value, Tensor &output)
     */
author	Donghyeon Jeong <dhyeon.jeong@samsung.com>
	Tue, 9 Jul 2024 11:57:46 +0000 (20:57 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Fri, 26 Jul 2024 01:27:26 +0000 (10:27 +0900)
nntrainer/tensor/float_tensor.cpp		patch \| blob \| history
nntrainer/tensor/float_tensor.h		patch \| blob \| history
nntrainer/tensor/half_tensor.cpp		patch \| blob \| history
nntrainer/tensor/half_tensor.h		patch \| blob \| history
nntrainer/tensor/tensor.cpp		patch \| blob \| history
nntrainer/tensor/tensor.h		patch \| blob \| history
nntrainer/tensor/tensor_base.h		patch \| blob \| history