[Tensor] ShortTensor class with unsigned 16-bit integer

author Donghyeon Jeong <dhyeon.jeong@samsung.com>

Mon, 5 Aug 2024 10:49:22 +0000 (19:49 +0900)

committer MyungJoo Ham <myungjoo.ham@samsung.com>

Tue, 27 Aug 2024 04:38:46 +0000 (13:38 +0900)
author Donghyeon Jeong <dhyeon.jeong@samsung.com>
Mon, 5 Aug 2024 10:49:22 +0000 (19:49 +0900)
committer MyungJoo Ham <myungjoo.ham@samsung.com>
Tue, 27 Aug 2024 04:38:46 +0000 (13:38 +0900)
diff --git a/api/ccapi/include/tensor_dim.h b/api/ccapi/include/tensor_dim.h

index 64523618c12fa345369b2f9448917bac9250b181..7324581953b9e20c62c075f87b93f0d04f17b460 100644 (file)
--- a/api/ccapi/include/tensor_dim.h
+++ b/api/ccapi/include/tensor_dim.h
@@ -48,14 +48,15 @@ public:
    enum class Format { NCHW, NHWC };
  
    /**
-   * @brief Tensor Data Type. Currently QINT4, QINT8, FP16 & FP32 Support
-   *
+   * @brief Tensor Data Type.
+   * Currently support QINT4, QINT8, UINT16, FP16 & FP32
     */
    enum class DataType {
-    QINT4, /** quantized int 4*/
-    QINT8, /** quantized int 8*/
-    FP16,  /** half precision */
-    FP32   /** single precision */
+    QINT4,  /** quantized int 4*/
+    QINT8,  /** quantized int 8*/
+    UINT16, /** unsigned int 16 bit */
+    FP16,   /** half precision */
+    FP32    /** single precision */
    };
  
    /**
@@ -97,9 +98,7 @@ public:
       */
      TensorType(Format fm, DataType d_type,
                 StorageOrder order = StorageOrder::ROW_MAJOR) :
-      format(fm),
-      data_type(d_type),
-      storage_order(order){};
+      format(fm), data_type(d_type), storage_order(order){};
    };
  
    /**
@@ -113,7 +112,7 @@ public:
     * @brief     Creator of TensorDim with Format & DataType
     *
     * @param fm format NCHW | HNWC
-   * @param d_type DataType QINT4 | QINT8 | FP16 | FP32
+   * @param d_type DataType QINT4 | QINT8 | UINT16 | FP16 | FP32
     * @param eff_dim_flag_ effective dimension flag (1 means it's effective)
     * @param dyn_dim_flag_ dynamic dimension flag (1 means it's unspecified)
     */
@@ -216,7 +215,7 @@ public:
     * @param h height
     * @param w width
     * @param fm format NCHW | HNWC
-   * @param d_type Data Type QINT4 | QINT8 | FP16 | FP32
+   * @param d_type Data Type QINT4 | QINT8 | UINT16 | FP16 | FP32
     * @param eff_dim_flag_ dimension bit flag to calculate the dynamic
     * dimension, rightmost is width
     */
@@ -245,7 +244,7 @@ public:
     *
     * @param shape shape of format
     * @param fm format NCHW | HNWC
-   * @param d_type data type QINT4 | QINT8 | FP16 | FP32
+   * @param d_type data type QINT4 | QINT8 | UINT16 | FP16 | FP32
     * @param order data storage order ROW_MAJOR | COL_MAJOR
     */
    TensorDim(const std::string &shape, TensorDim::Format fm,
diff --git a/debian/nntrainer-dev.install b/debian/nntrainer-dev.install

index 9517b6876fc501b3a4a985e34cae14505412b6e3..6390d02b0aaaf77391a64d6f23857cb6a81c5192 100644 (file)
--- a/debian/nntrainer-dev.install
+++ b/debian/nntrainer-dev.install
@@ -11,6 +11,7 @@
  /usr/include/nntrainer/tensor.h
  /usr/include/nntrainer/tensor_base.h
  /usr/include/nntrainer/char_tensor.h
+/usr/include/nntrainer/short_tensor.h
  /usr/include/nntrainer/float_tensor.h
  /usr/include/nntrainer/tensor_wrap_specs.h
  /usr/include/nntrainer/blas_interface.h
diff --git a/nntrainer/tensor/meson.build b/nntrainer/tensor/meson.build

index 1e69ad907a7043f4e7f7b1cc49daa3c0919e3db7..19c32096b9ba37eb8501e21e06dc4dd1c7253133 100644 (file)
--- a/nntrainer/tensor/meson.build
+++ b/nntrainer/tensor/meson.build
@@ -9,6 +9,7 @@ tensor_sources = [
    'tensor_base.cpp',
    'float_tensor.cpp',
    'char_tensor.cpp',
+  'short_tensor.cpp',
    'tensor_dim.cpp',
    'var_grad.cpp',
    'weight.cpp',
@@ -28,6 +29,7 @@ tensor_headers = [
    'tensor_base.h',
    'float_tensor.h',
    'char_tensor.h',
+  'short_tensor.h',
    'weight.h',
    'var_grad.h',    
    'tensor_wrap_specs.h',
diff --git a/nntrainer/tensor/short_tensor.cpp b/nntrainer/tensor/short_tensor.cpp

new file mode 100644 (file)

index 0000000..8705b10
--- /dev/null
+++ b/nntrainer/tensor/short_tensor.cpp
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * @file       short_tensor.cpp
+ * @date       02 April 2024
+ * @brief      This is ShortTensor class for 16-bit unsigned integer calculation
+ * @see                https://github.com/nnstreamer/nntrainer
+ * @author     Donghyeon Jeong <dhyeon.jeong@samsung.com>
+ * @bug                No known bugs except for NYI items
+ */
+
+#include <iomanip>
+#include <iostream>
+
+#include <blas_interface.h>
+#include <short_tensor.h>
+#include <tensor.h>
+
+namespace nntrainer {
+
+ShortTensor::ShortTensor(std::string name_, Tformat fm) :
+  TensorBase(name_, fm, Tdatatype::UINT16) {}
+
+ShortTensor::ShortTensor(const TensorDim &d, bool alloc_now, Initializer init,
+                         std::string name) :
+  TensorBase(d, alloc_now, init, name) {
+  if (alloc_now)
+    allocate();
+}
+
+ShortTensor::ShortTensor(const TensorDim &d, const void *buf) :
+  ShortTensor(d, true) {
+  if (d.getDataLen() != 0) {
+    if (buf != nullptr)
+      copy(buf);
+  }
+}
+
+ShortTensor::ShortTensor(
+  std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
+  Tformat fm) {
+  if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
+    throw std::out_of_range(
+      "[Tensor] trying to initialize ShortTensor from empty vector");
+  }
+
+  dim.setTensorDim(0, d.size());
+  if (fm == Tformat::NCHW) {
+    dim.setTensorDim(1, d[0].size());
+    dim.setTensorDim(2, d[0][0].size());
+    dim.setTensorDim(3, d[0][0][0].size());
+  } else {
+    dim.setTensorDim(2, d[0].size());
+    dim.setTensorDim(3, d[0][0].size());
+    dim.setTensorDim(1, d[0][0][0].size());
+  }
+
+  dim.setTensorType({fm, Tdatatype::UINT16});
+
+  strides = dim.computeStrides();
+  contiguous = true;
+  initializer = Initializer::NONE;
+
+  MemoryData *mem_data =
+    new MemoryData((void *)(new uint16_t[dim.getDataLen()]()));
+  data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
+    delete[] mem_data->getAddr<uint16_t>();
+  });
+
+  offset = 0;
+
+  // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
+  // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
+  // dim[1] == height, dim[2] == width, dim[3] == channel
+  if (fm == Tformat::NCHW) {
+    for (unsigned int i = 0; i < batch(); ++i)
+      for (unsigned int j = 0; j < channel(); ++j)
+        for (unsigned int k = 0; k < height(); ++k)
+          for (unsigned int l = 0; l < width(); ++l)
+            this->setValue(i, j, k, l, d[i][j][k][l]);
+  } else {
+    for (unsigned int i = 0; i < batch(); ++i)
+      for (unsigned int j = 0; j < height(); ++j)
+        for (unsigned int k = 0; k < width(); ++k)
+          for (unsigned int l = 0; l < channel(); ++l)
+            this->setValue(i, l, j, k, d[i][j][k][l]);
+  }
+}
+
+bool ShortTensor::operator==(const ShortTensor &rhs) const {
+  const uint16_t *_data = (uint16_t *)getData();
+  const uint16_t *_rdata = (uint16_t *)rhs.getData();
+  for (size_t i = 0; i < size(); ++i) {
+    if (_data[i] != _rdata[i])
+      return false;
+  }
+
+  return true;
+}
+
+void ShortTensor::allocate() {
+  if (empty() || data)
+    return;
+
+  if (src_tensor) {
+    /// allocate data based on the source tensor
+    allocateSrcTensor();
+    /** as this memory is shared, do NOT initialize */
+  } else {
+    /// allocate new memory for the tensor data
+    MemoryData *mem_data;
+
+    mem_data = new MemoryData((void *)(new uint16_t[dim.getDataLen()]{}));
+    data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
+      delete[] mem_data->template getAddr<uint16_t>();
+      delete mem_data;
+    });
+
+    offset = 0;
+    initialize();
+  }
+}
+
+void ShortTensor::deallocate() {
+  data = nullptr;
+  offset = 0;
+}
+
+void *ShortTensor::getData() const {
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return data->getAddr<uint16_t>() + offset;
+}
+
+void *ShortTensor::getData(size_t idx) const {
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return data->getAddr<uint16_t>() + offset + idx;
+}
+
+void *ShortTensor::getAddress(unsigned int i) {
+  size_t index = getIndex(batch(), channel(), height(), width());
+  if (i > index) {
+    return nullptr;
+  }
+  return &((uint16_t *)getData())[i];
+}
+
+const void *ShortTensor::getAddress(unsigned int i) const {
+  size_t index = getIndex(batch(), channel(), height(), width());
+  if (i > index) {
+    return nullptr;
+  }
+  return &((uint16_t *)getData())[i];
+}
+
+const uint16_t &ShortTensor::getValue(unsigned int i) const {
+  return ((uint16_t *)getData())[i];
+}
+
+uint16_t &ShortTensor::getValue(unsigned int i) {
+  return ((uint16_t *)getData())[i];
+}
+
+const uint16_t &ShortTensor::getValue(unsigned int b, unsigned int c,
+                                      unsigned int h, unsigned int w) const {
+  return getValue(getIndex(b, c, h, w));
+}
+
+uint16_t &ShortTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
+                                unsigned int w) {
+  return getValue(getIndex(b, c, h, w));
+}
+
+void ShortTensor::setValue(float value) {
+  uint16_t *data = (uint16_t *)getData();
+  std::fill(data, data + size(), value);
+}
+
+void ShortTensor::addValue(unsigned int b, unsigned int c, unsigned int h,
+                           unsigned int w, float value, float beta) {
+  auto const &idx = getIndex(b, c, h, w);
+  float output = ((uint16_t *)getData())[idx];
+  output *= beta;
+  output += value;
+
+  ((uint16_t *)getData())[idx] = std::trunc(output);
+}
+
+void ShortTensor::setValue(unsigned int b, unsigned int c, unsigned int h,
+                           unsigned int w, float value) {
+  ((uint16_t *)getData())[getIndex(b, c, h, w)] = (uint16_t)value;
+}
+
+void ShortTensor::setZero() {
+  /// @todo replace with apply_i or scal
+  setValue(0);
+}
+
+void ShortTensor::initialize() {
+  if (empty() || !isAllocated())
+    return;
+
+  /// @note Sampling from the normal/uniform distribution is invalid
+  switch (initializer) {
+  case Initializer::ZEROS:
+    setZero();
+    break;
+  case Initializer::ONES:
+    setValue(1.0f);
+    break;
+  case Initializer::NONE:
+    break;
+  default:
+    throw std::invalid_argument("Initializer not valid for " +
+                                getStringDataType());
+    break;
+  }
+
+  putData();
+}
+
+void ShortTensor::initialize(Initializer init) {
+  initializer = init;
+  initialize();
+}
+
+void ShortTensor::copy(const Tensor &from) {
+  reshape(from.getDim());
+  copy(from.getData());
+}
+
+void ShortTensor::copyData(const Tensor &from) {
+  NNTR_THROW_IF(!contiguous, std::invalid_argument)
+    << getName() << " is not contiguous, cannot copy.";
+
+  NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
+    << "Size of tensor to copy must match";
+
+  /// @todo support copy from other data types
+  switch (from.getDataType()) {
+  case ml::train::TensorDim::DataType::UINT16:
+    copy(from.getData());
+  default:
+    throw std::invalid_argument("Error: Unsupported data type");
+    break;
+  }
+}
+
+void ShortTensor::copy_with_stride(const Tensor &input, Tensor &output) {
+  for (unsigned int b = 0; b < output.batch(); ++b) {
+    for (unsigned int c = 0; c < output.channel(); ++c) {
+      for (unsigned int h = 0; h < output.height(); ++h) {
+        for (unsigned int w = 0; w < output.width(); ++w) {
+          output.setValue(b, c, h, w, input.getValue<uint16_t>(b, c, h, w));
+        }
+      }
+    }
+  }
+}
+
+std::vector<unsigned int> ShortTensor::argmax() const {
+  std::vector<unsigned int> result;
+  const uint16_t *data = (uint16_t *)getData();
+  size_t batch_size = batch();
+  size_t feature_len = dim.getFeatureLen();
+
+  result.resize(batch_size);
+
+  for (unsigned int b = 0; b < batch_size; b++) {
+    auto max_iter =
+      std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
+    result[b] = std::distance(data, max_iter) - (b * feature_len);
+  }
+  return result;
+}
+
+float ShortTensor::max_abs() const {
+  const uint16_t *data = (uint16_t *)getData();
+  unsigned int idx;
+
+  uint16_t max_val = data[0];
+  for (unsigned int i = 1; i < size(); i += 1) {
+    uint16_t cur_val = (data[i] >= 0) ? data[i] : -1 * data[i];
+    if (cur_val > max_val) {
+      max_val = cur_val;
+    }
+  }
+
+  return max_val;
+}
+
+float ShortTensor::maxValue() const {
+  const uint16_t *data = (uint16_t *)getData();
+  return *std::max_element(data, data + size());
+}
+
+float ShortTensor::minValue() const {
+  const uint16_t *data = (uint16_t *)getData();
+  return *std::min_element(data, data + size());
+}
+
+void ShortTensor::print(std::ostream &out) const {
+  const uint16_t *data = (uint16_t *)getData();
+  unsigned int len = size();
+  out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
+  out << dim;
+
+  if (len > 512) {
+    out << '[' << (int)data[0] << ' ' << (int)data[1] << ' ' << (int)data[2]
+        << " ... " << (int)data[len - 3] << ' ' << (int)data[len - 2] << ' '
+        << (int)data[len - 1] << ']' << std::endl;
+    return;
+  }
+
+  std::ios init(NULL);
+  init.copyfmt(out);
+  if (getFormat() == Tformat::NCHW) {
+    for (unsigned int k = 0; k < batch(); k++) {
+      for (unsigned int l = 0; l < channel(); l++) {
+        for (unsigned int i = 0; i < height(); i++) {
+          for (unsigned int j = 0; j < width(); j++) {
+            out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
+          }
+          out << std::endl;
+        }
+        out << std::endl;
+      }
+      out << "-------" << std::endl;
+    }
+  } else {
+    for (unsigned int k = 0; k < batch(); k++) {
+      for (unsigned int i = 0; i < height(); i++) {
+        for (unsigned int j = 0; j < width(); j++) {
+          for (unsigned int l = 0; l < channel(); l++) {
+            out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
+          }
+          out << std::endl;
+        }
+        out << std::endl;
+      }
+      out << "-------" << std::endl;
+    }
+    out.copyfmt(init);
+  }
+}
+
+void ShortTensor::copy(const void *buf) {
+  NNTR_THROW_IF(!contiguous, std::invalid_argument)
+    << getName() << " is not contiguous, cannot copy.";
+
+  if (buf == getData()) {
+    return;
+  }
+
+  /// @todo need to optimize
+  for (unsigned int i = 0; i < size(); ++i) {
+    ((uint16_t *)getData())[i] = ((uint16_t *)buf)[i];
+  }
+}
+
+} // namespace nntrainer
diff --git a/nntrainer/tensor/short_tensor.h b/nntrainer/tensor/short_tensor.h

new file mode 100644 (file)

index 0000000..c27e68b
--- /dev/null
+++ b/nntrainer/tensor/short_tensor.h
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * @file       short_tensor.h
+ * @date       02 April 2024
+ * @brief      This is ShortTensor class for 16-bit unsigned integer calculation
+ * @see                https://github.com/nnstreamer/nntrainer
+ * @author     Donghyeon Jeong <dhyeon.jeong@samsung.com>
+ * @bug                No known bugs except for NYI items
+ */
+
+#ifndef __SHORT_TENSOR_H__
+#define __SHORT_TENSOR_H__
+#ifdef __cplusplus
+
+#include <tensor_base.h>
+
+namespace nntrainer {
+
+/**
+ * @class ShortTensor class
+ * @brief ShortTensor class for 16-bit unsigned integer calculation
+ */
+class ShortTensor : public TensorBase {
+public:
+  /**
+   * @brief     Basic Constructor of Tensor
+   */
+  ShortTensor(std::string name_ = "", Tformat fm = Tformat::NCHW);
+
+  /**
+   * @brief Construct a new ShortTensor object
+   *
+   * @param d Tensor dim for this float tensor
+   * @param alloc_now Allocate memory to this tensor or not
+   * @param init Initializer for the tensor
+   * @param name Name of the tensor
+   */
+  ShortTensor(const TensorDim &d, bool alloc_now,
+              Initializer init = Initializer::NONE, std::string name = "");
+
+  /**
+   * @brief Construct a new ShortTensor object
+   *
+   * @param d Tensor dim for this tensor
+   * @param buf buffer
+   */
+  ShortTensor(const TensorDim &d, const void *buf = nullptr);
+
+  /**
+   * @brief Construct a new ShortTensor object
+   *
+   * @param d data for the Tensor
+   * @param fm format for the Tensor
+   */
+  ShortTensor(
+    std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
+    Tformat fm);
+
+  /**
+   * @brief Construct a new ShortTensor object
+   * @param rhs TensorBase object to copy
+   */
+  ShortTensor(TensorBase &rhs) : TensorBase(rhs) {}
+
+  /**
+   * @brief Basic Destructor
+   */
+  ~ShortTensor() {}
+
+  /**
+   * @brief     Comparison operator overload
+   * @param[in] rhs Tensor to be compared with
+   * @note      Only compares Tensor data
+   */
+  bool operator==(const ShortTensor &rhs) const;
+
+  /**
+   * @brief     Comparison operator overload
+   * @param[in] rhs Tensor to be compared with
+   * @note      Only compares Tensor data
+   */
+  bool operator!=(const ShortTensor &rhs) const { return !(*this == rhs); }
+
+  /**
+   * @copydoc Tensor::allocate()
+   */
+  void allocate() override;
+
+  /**
+   * @copydoc Tensor::deallocate()
+   */
+  void deallocate() override;
+
+  /**
+   * @copydoc Tensor::getData()
+   */
+  void *getData() const override;
+
+  /**
+   * @copydoc Tensor::getData(size_t idx)
+   */
+  void *getData(size_t idx) const override;
+
+  /**
+   * @brief     i data index
+   * @retval    address of ith data
+   */
+  void *getAddress(unsigned int i) override;
+
+  /**
+   * @brief     i data index
+   * @retval    address of ith data
+   */
+  const void *getAddress(unsigned int i) const override;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] i index
+   */
+  const uint16_t &getValue(unsigned int i) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] i index
+   */
+  uint16_t &getValue(unsigned int i);
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] b batch location
+   * @param[in] c channel location
+   * @param[in] h height location
+   * @param[in] w width location
+   */
+  const uint16_t &getValue(unsigned int b, unsigned int c, unsigned int h,
+                           unsigned int w) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] b batch location
+   * @param[in] c channel location
+   * @param[in] h height location
+   * @param[in] w width location
+   */
+  uint16_t &getValue(unsigned int b, unsigned int c, unsigned int h,
+                     unsigned int w);
+
+  /**
+   * @copydoc Tensor::setValue(float value)
+   */
+  void setValue(float value) override;
+
+  /**
+   * @copydoc Tensor::setValue(b, c, h, w, value)
+   */
+  void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w,
+                float value) override;
+
+  /**
+   * @copydoc Tensor::addValue(b, c, h, w, value, beta)
+   */
+  void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w,
+                float value, float beta) override;
+
+  /**
+   * @copydoc Tensor::setZero()
+   */
+  void setZero() override;
+
+  /**
+   * @copydoc Tensor::initialize()
+   */
+  void initialize() override;
+
+  /**
+   * @copydoc Tensor::initialize(Initializer init)
+   */
+  void initialize(Initializer init) override;
+
+  /**
+   * @copydoc Tensor::copy(const Tensor &from)
+   */
+  void copy(const Tensor &from) override;
+
+  /**
+   * @copydoc Tensor::copyData(const Tensor &from)
+   */
+  void copyData(const Tensor &from) override;
+
+  /**
+   * @copydoc Tensor::copy_with_stride()
+   */
+  void copy_with_stride(const Tensor &input, Tensor &output) override;
+
+  /**
+   * @copydoc Tensor::argmax()
+   */
+  std::vector<unsigned int> argmax() const override;
+
+  /**
+   * @copydoc Tensor::max_abs()
+   */
+  float max_abs() const override;
+
+  /**
+   * @copydoc Tensor::maxValue()
+   */
+  float maxValue() const override;
+
+  /**
+   * @copydoc Tensor::minValue()
+   */
+  float minValue() const override;
+
+  /**
+   * @copydoc Tensor::print(std::ostream &out)
+   */
+  void print(std::ostream &out) const override;
+
+private:
+  /**
+   * @brief copy a buffer to @a this, the caller has to ensure that @a this is
+   * initialized otherwise undefined behavior
+   *
+   * @param buf buffer to copy from
+   */
+  void copy(const void *buf);
+
+  /**
+   * @brief  Get the Data Type String object
+   * @return std::string of tensor data type (UINT16)
+   */
+  std::string getStringDataType() const override { return "UINT16"; }
+};
+
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __SHORT_TENSOR_H__ */
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp

index 8ffe57929d310417d9d369eb791ea9c9c01af0c8..5c38fcf093c7c162219fbead3441cea17dbcad39 100644 (file)
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -12,6 +12,7 @@
  #include <char_tensor.h>
  #include <float_tensor.h>
  #include <lazy_tensor.h>
+#include <short_tensor.h>
  #include <tensor.h>
  
  #ifdef ENABLE_FP16
@@ -32,6 +33,9 @@ Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) {
  #else
      throw std::invalid_argument("Error: enable-fp16 is not enabled");
  #endif
+  } else if (d_type == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(name_, fm),
+                                           std::default_delete<ShortTensor>());
    } else if (d_type == Tdatatype::QINT8) {
      itensor = std::shared_ptr<CharTensor>(new CharTensor(name_, fm),
                                            std::default_delete<CharTensor>());
@@ -59,6 +63,10 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init,
  #else
      throw std::invalid_argument("Error: enable-fp16 is not enabled");
  #endif
+  } else if (d.getDataType() == Tdatatype::UINT16) {
+    itensor =
+      std::shared_ptr<ShortTensor>(new ShortTensor(d, alloc_now, init, name),
+                                   std::default_delete<ShortTensor>());
    } else if (d.getDataType() == Tdatatype::QINT8) {
      itensor =
        std::shared_ptr<CharTensor>(new CharTensor(d, alloc_now, init, name),
@@ -84,6 +92,9 @@ Tensor::Tensor(const TensorDim &d, const void *buf) {
  #else
      throw std::invalid_argument("Error: enable-fp16 is not enabled");
  #endif
+  } else if (d.getDataType() == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(d, buf),
+                                           std::default_delete<ShortTensor>());
    } else if (d.getDataType() == Tdatatype::QINT8) {
      itensor = std::shared_ptr<CharTensor>(new CharTensor(d, buf),
                                            std::default_delete<CharTensor>());
@@ -106,6 +117,9 @@ Tensor::Tensor(const Tensor &rhs) {
  #else
      throw std::invalid_argument("Error: enable-fp16 is not enabled");
  #endif
+  } else if (rhs.getDataType() == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(*rhs.itensor),
+                                           std::default_delete<ShortTensor>());
    } else if (rhs.getDataType() == Tdatatype::QINT8) {
      itensor = std::shared_ptr<CharTensor>(new CharTensor(*rhs.itensor),
                                            std::default_delete<CharTensor>());
@@ -123,6 +137,9 @@ Tensor &Tensor::operator=(const Tensor &rhs) {
  #else
      throw std::invalid_argument("Error: enable-fp16 is not enabled");
  #endif
+  } else if (rhs.getDataType() == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(*rhs.itensor),
+                                           std::default_delete<ShortTensor>());
    } else if (rhs.getDataType() == Tdatatype::QINT8) {
      itensor = std::shared_ptr<CharTensor>(new CharTensor(*rhs.itensor),
                                            std::default_delete<CharTensor>());
@@ -146,6 +163,9 @@ bool Tensor::operator==(const Tensor &rhs) const {
          "Error: HalfTensor cannot be created or used when FP16 is not enabled. "
          "Please check if the tensor data type is set properly.");
  #endif
+    } else if (getDataType() == Tdatatype::UINT16) {
+      return *std::dynamic_pointer_cast<ShortTensor>(itensor) ==
+             *std::dynamic_pointer_cast<ShortTensor>(rhs.itensor);
      } else if (getDataType() == Tdatatype::QINT8) {
        return *std::dynamic_pointer_cast<CharTensor>(itensor) ==
               *std::dynamic_pointer_cast<CharTensor>(rhs.itensor);
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h

index 472d694f4b61675875be2af58a1adddfd7a3aa03..74e0a3437e43216f3a4510a84782c7fc01ce32d1 100644 (file)
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -27,6 +27,7 @@
  #include <char_tensor.h>
  #include <float_tensor.h>
  #include <nntrainer_log.h>
+#include <short_tensor.h>
  #include <tensor_base.h>
  
  #ifdef ENABLE_FP16
@@ -230,6 +231,37 @@ public:
      Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
  #endif
  
+  /**
+   * @brief     Constructor of Tensor
+   * @param[in] d data for the Tensor. It needs to set format properly.
+   * @param[in] t_type Tensor Type
+   */
+  Tensor(std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
+         ml::train::TensorDim::TensorType t_type) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(d, t_type.format),
+                                           std::default_delete<ShortTensor>());
+  }
+
+  /**
+   * @brief     Constructor of Tensor
+   * @note      This constructor copies vector again. needs refactoring
+   * @param[in] d data for the Tensor. It needs to set format properly.
+   * @param[in] t_type Tensor Type
+   */
+  Tensor(std::vector<std::vector<std::vector<uint16_t>>> const &d,
+         ml::train::TensorDim::TensorType t_type) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+
+  /**
+   * @brief     Constructor of Tensor
+   * @note      This constructor copies vector again. needs refactoring
+   * @param[in] d data for the Tensor with batch size one
+   * @param[in] t_type Tensor Type
+   */
+  Tensor(std::vector<std::vector<uint16_t>> const &d,
+         ml::train::TensorDim::TensorType t_type) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+
    /**
     * @brief     Constructor of Tensor
     * @param[in] d data for the Tensor. It needs to set format properly.
diff --git a/nntrainer/tensor/tensor_dim.cpp b/nntrainer/tensor/tensor_dim.cpp

index 6bf1c2789013eb1bfa848a12c9dbe5a1ca5150d1..753e64f7606c199bd688dbea15045e623071fd9f 100644 (file)
--- a/nntrainer/tensor/tensor_dim.cpp
+++ b/nntrainer/tensor/tensor_dim.cpp
@@ -33,9 +33,7 @@ TensorDim::TensorDim(TensorDim::Format fm, TensorDim::DataType d_type,
  TensorDim::TensorDim(TensorType t_type_,
                       const std::bitset<MAXDIM> &eff_dim_flag_,
                       const std::bitset<MAXDIM> &dyn_dim_flag_) :
-  t_type(t_type_),
-  eff_dim_flag(eff_dim_flag_),
-  dyn_dim_flag(dyn_dim_flag_) {
+  t_type(t_type_), eff_dim_flag(eff_dim_flag_), dyn_dim_flag(dyn_dim_flag_) {
    for (size_t i = 0; i < MAXDIM; ++i) {
      dim[i] = 0;
    }
@@ -157,6 +155,8 @@ uint TensorDim::getDataTypeSize() const {
  #endif
    case TensorDim::DataType::FP32:
      return sizeof(float);
+  case TensorDim::DataType::UINT16:
+    return sizeof(uint16_t);
    case TensorDim::DataType::QINT8:
      return sizeof(int8_t);
    case TensorDim::DataType::QINT4:
@@ -378,6 +378,8 @@ std::ostream &operator<<(std::ostream &out, TensorDim const &d) {
      type_ = "FP32";
    } else if (d.getDataType() == ml::train::TensorDim::DataType::FP16) {
      type_ = "FP16";
+  } else if (d.getDataType() == ml::train::TensorDim::DataType::UINT16) {
+    type_ = "UINT16";
    } else if (d.getDataType() == ml::train::TensorDim::DataType::QINT8) {
      type_ = "QINT8";
    } else if (d.getDataType() == ml::train::TensorDim::DataType::QINT4) {
diff --git a/packaging/nntrainer.spec b/packaging/nntrainer.spec

index 2266ca398766f63a8048ffbc6e59179a9e5f12d2..80d682c908d7e1b10f77dcd6e7e3e8efdea7c682 100644 (file)
--- a/packaging/nntrainer.spec
+++ b/packaging/nntrainer.spec
@@ -529,6 +529,7 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/
  %{_includedir}/nntrainer/tensor.h
  %{_includedir}/nntrainer/tensor_base.h
  %{_includedir}/nntrainer/char_tensor.h
+%{_includedir}/nntrainer/short_tensor.h
  %{_includedir}/nntrainer/float_tensor.h
  %if 0%{?enable_fp16}
  %{_includedir}/nntrainer/half_tensor.h
diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp

index 3a16187468512c29145ab4252b4b28078899a1d7..0d6d47be3771a49f131841581d612233048ceed9 100644 (file)
--- a/test/unittest/unittest_nntrainer_tensor.cpp
+++ b/test/unittest/unittest_nntrainer_tensor.cpp
@@ -227,28 +227,28 @@ TEST(nntrainer_Tensor, Tensor_04_p) {
    EXPECT_EQ(status, ML_ERROR_NONE);
  }
  
-// TEST(nntrainer_Tensor, Tensor_05_p) {
-//   int status = ML_ERROR_NONE;
-//   std::vector<std::vector<std::vector<uint8_t>>> in = {{{0, 1}, {2, 3}},
-//                                                        {{4, 5}, {6, 7}},
-//                                                        {{8, 9}, {10, 11}},
-//                                                        {{12, 13}, {14, 15}}};
-
-//   nntrainer::Tensor tensor = nntrainer::Tensor(
-//     in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4});
-//   ASSERT_NE(nullptr, tensor.getData<uint8_t>());
+TEST(nntrainer_Tensor, Tensor_05_p) {
+  int status = ML_ERROR_NONE;
+  std::vector<std::vector<std::vector<uint16_t>>> in = {{{0, 1}, {2, 3}},
+                                                        {{4, 5}, {6, 7}},
+                                                        {{8, 9}, {10, 11}},
+                                                        {{12, 13}, {14, 15}}};
  
-//   for (size_t b = 0; b < tensor.batch(); ++b) {
-//     for (size_t c = 0; c < tensor.channel(); ++c) {
-//       for (size_t h = 0; h < tensor.height(); ++h) {
-//         for (size_t w = 0; w < tensor.width(); ++w) {
-//           size_t idx = tensor.getIndex(b, c, h, w);
-//           ASSERT_EQ(idx, tensor.getValueQint4(idx));
-//         }
-//       }
-//     }
-//   }
-// }
+  nntrainer::Tensor tensor = nntrainer::Tensor(
+    in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16});
+  ASSERT_NE(nullptr, tensor.getData<uint16_t>());
+
+  for (size_t b = 0; b < tensor.batch(); ++b) {
+    for (size_t c = 0; c < tensor.channel(); ++c) {
+      for (size_t h = 0; h < tensor.height(); ++h) {
+        for (size_t w = 0; w < tensor.width(); ++w) {
+          size_t idx = tensor.getIndex(b, c, h, w);
+          ASSERT_EQ(idx, tensor.getValue<uint16_t>(idx));
+        }
+      }
+    }
+  }
+}
  
  // TEST(nntrainer_Tensor, Tensor_06_p) {
  //   int status = ML_ERROR_NONE;
@@ -3131,7 +3131,7 @@ TEST(nntrainer_Tensor, save_read_01_n) {
    ASSERT_EQ(status, 0);
  }
  
-TEST(nntrainer_Tensor, copy_and_shares_variable_p) {
+TEST(nntrainer_Tensor, copy_and_shares_variable_01_p) {
    nntrainer::Tensor A = constant(1.0f, 3, 4, 5, 6);
    nntrainer::Tensor B = A.clone();
    nntrainer::Tensor C = A;
@@ -3146,6 +3146,23 @@ TEST(nntrainer_Tensor, copy_and_shares_variable_p) {
    EXPECT_NE(A.getDim(), C.getDim());
  }
  
+TEST(nntrainer_Tensor, copy_and_shares_variable_02_p) {
+  nntrainer::Tensor A = constant(10, 3, 4, 5, 6, nntrainer::Tformat::NCHW,
+                                 nntrainer::Tdatatype::UINT16);
+  nntrainer::Tensor B = A.clone();
+  nntrainer::Tensor C = A;
+
+  C.setValue(1, 1, 1, 1, 9);
+
+  EXPECT_EQ(A, C);
+  EXPECT_NE(B, C);
+
+  C.reshape(nntrainer::TensorDim(3, 4, 6, 5, nntrainer::Tformat::NCHW,
+                                 nntrainer::Tdatatype::UINT16));
+  EXPECT_EQ(A.getDim(), B.getDim());
+  EXPECT_NE(A.getDim(), C.getDim());
+}
+
  TEST(nntrainer_Tensor, reshape_n_01) {
    nntrainer::Tensor A = constant(1.0f, 3, 4, 5, 6);
  
@@ -3352,15 +3369,15 @@ TEST(nntrainer_Tensor, allocate_04_p) {
    EXPECT_TRUE(t.isAllocated());
  }
  
-// TEST(nntrainer_Tensor, allocate_05_p) {
-//   nntrainer::Tensor t(
-//     {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}},
-//     true);
-//   EXPECT_TRUE(t.isAllocated());
+TEST(nntrainer_Tensor, allocate_05_p) {
+  nntrainer::Tensor t(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true);
+  EXPECT_TRUE(t.isAllocated());
  
-//   t.allocate();
-//   EXPECT_TRUE(t.isAllocated());
-// }
+  t.allocate();
+  EXPECT_TRUE(t.isAllocated());
+}
  
  TEST(nntrainer_Tensor, initialize_01_p) {
    nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES);
@@ -3479,6 +3496,28 @@ TEST(nntrainer_Tensor, initialize_09_p) {
    EXPECT_EQ(golden, t);
  }
  
+TEST(nntrainer_Tensor, initialize_10_p) {
+  nntrainer::Tensor t(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true, nntrainer::Initializer::ONES);
+  nntrainer::Tensor golden(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true, nntrainer::Initializer::ZEROS);
+  EXPECT_NE(golden, t);
+  golden.initialize(nntrainer::Initializer::ONES);
+  EXPECT_EQ(golden, t);
+}
+
+TEST(nntrainer_Tensor, initialize_11_n) {
+  nntrainer::Tensor t(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true);
+
+  /// @note ShortTensor does not support HE_NORMAL initialization
+  EXPECT_THROW(t.initialize(nntrainer::Initializer::HE_NORMAL),
+               std::invalid_argument);
+}
+
  TEST(nntrainer_Tensor, split_01_p) {
    {
      nntrainer::TensorDim ref_dim(3, 2, 4, 5);
author	Donghyeon Jeong <dhyeon.jeong@samsung.com>
	Mon, 5 Aug 2024 10:49:22 +0000 (19:49 +0900)
committer	MyungJoo Ham <myungjoo.ham@samsung.com>
	Tue, 27 Aug 2024 04:38:46 +0000 (13:38 +0900)
api/ccapi/include/tensor_dim.h		patch \| blob \| history
debian/nntrainer-dev.install		patch \| blob \| history
nntrainer/tensor/meson.build		patch \| blob \| history
nntrainer/tensor/short_tensor.cpp	[new file with mode: 0644]	patch \| blob
nntrainer/tensor/short_tensor.h	[new file with mode: 0644]	patch \| blob
nntrainer/tensor/tensor.cpp		patch \| blob \| history
nntrainer/tensor/tensor.h		patch \| blob \| history
nntrainer/tensor/tensor_dim.cpp		patch \| blob \| history
packaging/nntrainer.spec		patch \| blob \| history
test/unittest/unittest_nntrainer_tensor.cpp		patch \| blob \| history