From: Donghyeon Jeong <dhyeon.jeong@samsung.com>
Date: Mon, 5 Aug 2024 10:49:22 +0000 (+0900)
Subject: [Tensor] ShortTensor class with unsigned 16-bit integer
X-Git-Tag: accepted/tizen/7.0/unified/20240830.164841~10
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cc5ca33f9cbc6824c69796df1ddf3ecd8a0d563c;p=platform%2Fcore%2Fml%2Fnntrainer.git

[Tensor] ShortTensor class with unsigned 16-bit integer

In this PR, a new type of tensor, the ShortTensor class, is designed explicitly for handling unsigned 16-bit integer data types.
This new tensor class aims to provide users with more options when working with various data types.
Note that the ShortTensor class does not support mathematical operations like multiplication or addition.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
---

diff --git a/api/ccapi/include/tensor_dim.h b/api/ccapi/include/tensor_dim.h
index 64523618..73245819 100644
--- a/api/ccapi/include/tensor_dim.h
+++ b/api/ccapi/include/tensor_dim.h
@@ -48,14 +48,15 @@ public:
   enum class Format { NCHW, NHWC };
 
   /**
-   * @brief Tensor Data Type. Currently QINT4, QINT8, FP16 & FP32 Support
-   *
+   * @brief Tensor Data Type.
+   * Currently support QINT4, QINT8, UINT16, FP16 & FP32
    */
   enum class DataType {
-    QINT4, /** quantized int 4*/
-    QINT8, /** quantized int 8*/
-    FP16,  /** half precision */
-    FP32   /** single precision */
+    QINT4,  /** quantized int 4*/
+    QINT8,  /** quantized int 8*/
+    UINT16, /** unsigned int 16 bit */
+    FP16,   /** half precision */
+    FP32    /** single precision */
   };
 
   /**
@@ -97,9 +98,7 @@ public:
      */
     TensorType(Format fm, DataType d_type,
                StorageOrder order = StorageOrder::ROW_MAJOR) :
-      format(fm),
-      data_type(d_type),
-      storage_order(order){};
+      format(fm), data_type(d_type), storage_order(order){};
   };
 
   /**
@@ -113,7 +112,7 @@ public:
    * @brief     Creator of TensorDim with Format & DataType
    *
    * @param fm format NCHW | HNWC
-   * @param d_type DataType QINT4 | QINT8 | FP16 | FP32
+   * @param d_type DataType QINT4 | QINT8 | UINT16 | FP16 | FP32
    * @param eff_dim_flag_ effective dimension flag (1 means it's effective)
    * @param dyn_dim_flag_ dynamic dimension flag (1 means it's unspecified)
    */
@@ -216,7 +215,7 @@ public:
    * @param h height
    * @param w width
    * @param fm format NCHW | HNWC
-   * @param d_type Data Type QINT4 | QINT8 | FP16 | FP32
+   * @param d_type Data Type QINT4 | QINT8 | UINT16 | FP16 | FP32
    * @param eff_dim_flag_ dimension bit flag to calculate the dynamic
    * dimension, rightmost is width
    */
@@ -245,7 +244,7 @@ public:
    *
    * @param shape shape of format
    * @param fm format NCHW | HNWC
-   * @param d_type data type QINT4 | QINT8 | FP16 | FP32
+   * @param d_type data type QINT4 | QINT8 | UINT16 | FP16 | FP32
    * @param order data storage order ROW_MAJOR | COL_MAJOR
    */
   TensorDim(const std::string &shape, TensorDim::Format fm,
diff --git a/debian/nntrainer-dev.install b/debian/nntrainer-dev.install
index 9517b687..6390d02b 100644
--- a/debian/nntrainer-dev.install
+++ b/debian/nntrainer-dev.install
@@ -11,6 +11,7 @@
 /usr/include/nntrainer/tensor.h
 /usr/include/nntrainer/tensor_base.h
 /usr/include/nntrainer/char_tensor.h
+/usr/include/nntrainer/short_tensor.h
 /usr/include/nntrainer/float_tensor.h
 /usr/include/nntrainer/tensor_wrap_specs.h
 /usr/include/nntrainer/blas_interface.h
diff --git a/nntrainer/tensor/meson.build b/nntrainer/tensor/meson.build
index 1e69ad90..19c32096 100644
--- a/nntrainer/tensor/meson.build
+++ b/nntrainer/tensor/meson.build
@@ -9,6 +9,7 @@ tensor_sources = [
   'tensor_base.cpp',
   'float_tensor.cpp',
   'char_tensor.cpp',
+  'short_tensor.cpp',
   'tensor_dim.cpp',
   'var_grad.cpp',
   'weight.cpp',
@@ -28,6 +29,7 @@ tensor_headers = [
   'tensor_base.h',
   'float_tensor.h',
   'char_tensor.h',
+  'short_tensor.h',
   'weight.h',
   'var_grad.h',    
   'tensor_wrap_specs.h',
diff --git a/nntrainer/tensor/short_tensor.cpp b/nntrainer/tensor/short_tensor.cpp
new file mode 100644
index 00000000..8705b10e
--- /dev/null
+++ b/nntrainer/tensor/short_tensor.cpp
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * @file	short_tensor.cpp
+ * @date	02 April 2024
+ * @brief	This is ShortTensor class for 16-bit unsigned integer calculation
+ * @see		https://github.com/nnstreamer/nntrainer
+ * @author	Donghyeon Jeong <dhyeon.jeong@samsung.com>
+ * @bug		No known bugs except for NYI items
+ */
+
+#include <iomanip>
+#include <iostream>
+
+#include <blas_interface.h>
+#include <short_tensor.h>
+#include <tensor.h>
+
+namespace nntrainer {
+
+ShortTensor::ShortTensor(std::string name_, Tformat fm) :
+  TensorBase(name_, fm, Tdatatype::UINT16) {}
+
+ShortTensor::ShortTensor(const TensorDim &d, bool alloc_now, Initializer init,
+                         std::string name) :
+  TensorBase(d, alloc_now, init, name) {
+  if (alloc_now)
+    allocate();
+}
+
+ShortTensor::ShortTensor(const TensorDim &d, const void *buf) :
+  ShortTensor(d, true) {
+  if (d.getDataLen() != 0) {
+    if (buf != nullptr)
+      copy(buf);
+  }
+}
+
+ShortTensor::ShortTensor(
+  std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
+  Tformat fm) {
+  if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
+    throw std::out_of_range(
+      "[Tensor] trying to initialize ShortTensor from empty vector");
+  }
+
+  dim.setTensorDim(0, d.size());
+  if (fm == Tformat::NCHW) {
+    dim.setTensorDim(1, d[0].size());
+    dim.setTensorDim(2, d[0][0].size());
+    dim.setTensorDim(3, d[0][0][0].size());
+  } else {
+    dim.setTensorDim(2, d[0].size());
+    dim.setTensorDim(3, d[0][0].size());
+    dim.setTensorDim(1, d[0][0][0].size());
+  }
+
+  dim.setTensorType({fm, Tdatatype::UINT16});
+
+  strides = dim.computeStrides();
+  contiguous = true;
+  initializer = Initializer::NONE;
+
+  MemoryData *mem_data =
+    new MemoryData((void *)(new uint16_t[dim.getDataLen()]()));
+  data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
+    delete[] mem_data->getAddr<uint16_t>();
+  });
+
+  offset = 0;
+
+  // if fm == Tformat::NCHW, then dim[0] == batch , dim[1] == channel, dim[2]
+  // == height, dim[3] == width. and if fm == Tformat::NHWC, dim[0] == batch,
+  // dim[1] == height, dim[2] == width, dim[3] == channel
+  if (fm == Tformat::NCHW) {
+    for (unsigned int i = 0; i < batch(); ++i)
+      for (unsigned int j = 0; j < channel(); ++j)
+        for (unsigned int k = 0; k < height(); ++k)
+          for (unsigned int l = 0; l < width(); ++l)
+            this->setValue(i, j, k, l, d[i][j][k][l]);
+  } else {
+    for (unsigned int i = 0; i < batch(); ++i)
+      for (unsigned int j = 0; j < height(); ++j)
+        for (unsigned int k = 0; k < width(); ++k)
+          for (unsigned int l = 0; l < channel(); ++l)
+            this->setValue(i, l, j, k, d[i][j][k][l]);
+  }
+}
+
+bool ShortTensor::operator==(const ShortTensor &rhs) const {
+  const uint16_t *_data = (uint16_t *)getData();
+  const uint16_t *_rdata = (uint16_t *)rhs.getData();
+  for (size_t i = 0; i < size(); ++i) {
+    if (_data[i] != _rdata[i])
+      return false;
+  }
+
+  return true;
+}
+
+void ShortTensor::allocate() {
+  if (empty() || data)
+    return;
+
+  if (src_tensor) {
+    /// allocate data based on the source tensor
+    allocateSrcTensor();
+    /** as this memory is shared, do NOT initialize */
+  } else {
+    /// allocate new memory for the tensor data
+    MemoryData *mem_data;
+
+    mem_data = new MemoryData((void *)(new uint16_t[dim.getDataLen()]{}));
+    data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
+      delete[] mem_data->template getAddr<uint16_t>();
+      delete mem_data;
+    });
+
+    offset = 0;
+    initialize();
+  }
+}
+
+void ShortTensor::deallocate() {
+  data = nullptr;
+  offset = 0;
+}
+
+void *ShortTensor::getData() const {
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return data->getAddr<uint16_t>() + offset;
+}
+
+void *ShortTensor::getData(size_t idx) const {
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return data->getAddr<uint16_t>() + offset + idx;
+}
+
+void *ShortTensor::getAddress(unsigned int i) {
+  size_t index = getIndex(batch(), channel(), height(), width());
+  if (i > index) {
+    return nullptr;
+  }
+  return &((uint16_t *)getData())[i];
+}
+
+const void *ShortTensor::getAddress(unsigned int i) const {
+  size_t index = getIndex(batch(), channel(), height(), width());
+  if (i > index) {
+    return nullptr;
+  }
+  return &((uint16_t *)getData())[i];
+}
+
+const uint16_t &ShortTensor::getValue(unsigned int i) const {
+  return ((uint16_t *)getData())[i];
+}
+
+uint16_t &ShortTensor::getValue(unsigned int i) {
+  return ((uint16_t *)getData())[i];
+}
+
+const uint16_t &ShortTensor::getValue(unsigned int b, unsigned int c,
+                                      unsigned int h, unsigned int w) const {
+  return getValue(getIndex(b, c, h, w));
+}
+
+uint16_t &ShortTensor::getValue(unsigned int b, unsigned int c, unsigned int h,
+                                unsigned int w) {
+  return getValue(getIndex(b, c, h, w));
+}
+
+void ShortTensor::setValue(float value) {
+  uint16_t *data = (uint16_t *)getData();
+  std::fill(data, data + size(), value);
+}
+
+void ShortTensor::addValue(unsigned int b, unsigned int c, unsigned int h,
+                           unsigned int w, float value, float beta) {
+  auto const &idx = getIndex(b, c, h, w);
+  float output = ((uint16_t *)getData())[idx];
+  output *= beta;
+  output += value;
+
+  ((uint16_t *)getData())[idx] = std::trunc(output);
+}
+
+void ShortTensor::setValue(unsigned int b, unsigned int c, unsigned int h,
+                           unsigned int w, float value) {
+  ((uint16_t *)getData())[getIndex(b, c, h, w)] = (uint16_t)value;
+}
+
+void ShortTensor::setZero() {
+  /// @todo replace with apply_i or scal
+  setValue(0);
+}
+
+void ShortTensor::initialize() {
+  if (empty() || !isAllocated())
+    return;
+
+  /// @note Sampling from the normal/uniform distribution is invalid
+  switch (initializer) {
+  case Initializer::ZEROS:
+    setZero();
+    break;
+  case Initializer::ONES:
+    setValue(1.0f);
+    break;
+  case Initializer::NONE:
+    break;
+  default:
+    throw std::invalid_argument("Initializer not valid for " +
+                                getStringDataType());
+    break;
+  }
+
+  putData();
+}
+
+void ShortTensor::initialize(Initializer init) {
+  initializer = init;
+  initialize();
+}
+
+void ShortTensor::copy(const Tensor &from) {
+  reshape(from.getDim());
+  copy(from.getData());
+}
+
+void ShortTensor::copyData(const Tensor &from) {
+  NNTR_THROW_IF(!contiguous, std::invalid_argument)
+    << getName() << " is not contiguous, cannot copy.";
+
+  NNTR_THROW_IF(size() != from.size(), std::invalid_argument)
+    << "Size of tensor to copy must match";
+
+  /// @todo support copy from other data types
+  switch (from.getDataType()) {
+  case ml::train::TensorDim::DataType::UINT16:
+    copy(from.getData());
+  default:
+    throw std::invalid_argument("Error: Unsupported data type");
+    break;
+  }
+}
+
+void ShortTensor::copy_with_stride(const Tensor &input, Tensor &output) {
+  for (unsigned int b = 0; b < output.batch(); ++b) {
+    for (unsigned int c = 0; c < output.channel(); ++c) {
+      for (unsigned int h = 0; h < output.height(); ++h) {
+        for (unsigned int w = 0; w < output.width(); ++w) {
+          output.setValue(b, c, h, w, input.getValue<uint16_t>(b, c, h, w));
+        }
+      }
+    }
+  }
+}
+
+std::vector<unsigned int> ShortTensor::argmax() const {
+  std::vector<unsigned int> result;
+  const uint16_t *data = (uint16_t *)getData();
+  size_t batch_size = batch();
+  size_t feature_len = dim.getFeatureLen();
+
+  result.resize(batch_size);
+
+  for (unsigned int b = 0; b < batch_size; b++) {
+    auto max_iter =
+      std::max_element(data + b * feature_len, data + (b + 1) * feature_len);
+    result[b] = std::distance(data, max_iter) - (b * feature_len);
+  }
+  return result;
+}
+
+float ShortTensor::max_abs() const {
+  const uint16_t *data = (uint16_t *)getData();
+  unsigned int idx;
+
+  uint16_t max_val = data[0];
+  for (unsigned int i = 1; i < size(); i += 1) {
+    uint16_t cur_val = (data[i] >= 0) ? data[i] : -1 * data[i];
+    if (cur_val > max_val) {
+      max_val = cur_val;
+    }
+  }
+
+  return max_val;
+}
+
+float ShortTensor::maxValue() const {
+  const uint16_t *data = (uint16_t *)getData();
+  return *std::max_element(data, data + size());
+}
+
+float ShortTensor::minValue() const {
+  const uint16_t *data = (uint16_t *)getData();
+  return *std::min_element(data, data + size());
+}
+
+void ShortTensor::print(std::ostream &out) const {
+  const uint16_t *data = (uint16_t *)getData();
+  unsigned int len = size();
+  out << "data addr: " << reinterpret_cast<const float *>(data) << '\n';
+  out << dim;
+
+  if (len > 512) {
+    out << '[' << (int)data[0] << ' ' << (int)data[1] << ' ' << (int)data[2]
+        << " ... " << (int)data[len - 3] << ' ' << (int)data[len - 2] << ' '
+        << (int)data[len - 1] << ']' << std::endl;
+    return;
+  }
+
+  std::ios init(NULL);
+  init.copyfmt(out);
+  if (getFormat() == Tformat::NCHW) {
+    for (unsigned int k = 0; k < batch(); k++) {
+      for (unsigned int l = 0; l < channel(); l++) {
+        for (unsigned int i = 0; i < height(); i++) {
+          for (unsigned int j = 0; j < width(); j++) {
+            out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
+          }
+          out << std::endl;
+        }
+        out << std::endl;
+      }
+      out << "-------" << std::endl;
+    }
+  } else {
+    for (unsigned int k = 0; k < batch(); k++) {
+      for (unsigned int i = 0; i < height(); i++) {
+        for (unsigned int j = 0; j < width(); j++) {
+          for (unsigned int l = 0; l < channel(); l++) {
+            out << std::setw(10) << (int)this->getValue(k, l, i, j) << " ";
+          }
+          out << std::endl;
+        }
+        out << std::endl;
+      }
+      out << "-------" << std::endl;
+    }
+    out.copyfmt(init);
+  }
+}
+
+void ShortTensor::copy(const void *buf) {
+  NNTR_THROW_IF(!contiguous, std::invalid_argument)
+    << getName() << " is not contiguous, cannot copy.";
+
+  if (buf == getData()) {
+    return;
+  }
+
+  /// @todo need to optimize
+  for (unsigned int i = 0; i < size(); ++i) {
+    ((uint16_t *)getData())[i] = ((uint16_t *)buf)[i];
+  }
+}
+
+} // namespace nntrainer
diff --git a/nntrainer/tensor/short_tensor.h b/nntrainer/tensor/short_tensor.h
new file mode 100644
index 00000000..c27e68b1
--- /dev/null
+++ b/nntrainer/tensor/short_tensor.h
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * @file	short_tensor.h
+ * @date	02 April 2024
+ * @brief	This is ShortTensor class for 16-bit unsigned integer calculation
+ * @see		https://github.com/nnstreamer/nntrainer
+ * @author	Donghyeon Jeong <dhyeon.jeong@samsung.com>
+ * @bug		No known bugs except for NYI items
+ */
+
+#ifndef __SHORT_TENSOR_H__
+#define __SHORT_TENSOR_H__
+#ifdef __cplusplus
+
+#include <tensor_base.h>
+
+namespace nntrainer {
+
+/**
+ * @class ShortTensor class
+ * @brief ShortTensor class for 16-bit unsigned integer calculation
+ */
+class ShortTensor : public TensorBase {
+public:
+  /**
+   * @brief     Basic Constructor of Tensor
+   */
+  ShortTensor(std::string name_ = "", Tformat fm = Tformat::NCHW);
+
+  /**
+   * @brief Construct a new ShortTensor object
+   *
+   * @param d Tensor dim for this float tensor
+   * @param alloc_now Allocate memory to this tensor or not
+   * @param init Initializer for the tensor
+   * @param name Name of the tensor
+   */
+  ShortTensor(const TensorDim &d, bool alloc_now,
+              Initializer init = Initializer::NONE, std::string name = "");
+
+  /**
+   * @brief Construct a new ShortTensor object
+   *
+   * @param d Tensor dim for this tensor
+   * @param buf buffer
+   */
+  ShortTensor(const TensorDim &d, const void *buf = nullptr);
+
+  /**
+   * @brief Construct a new ShortTensor object
+   *
+   * @param d data for the Tensor
+   * @param fm format for the Tensor
+   */
+  ShortTensor(
+    std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
+    Tformat fm);
+
+  /**
+   * @brief Construct a new ShortTensor object
+   * @param rhs TensorBase object to copy
+   */
+  ShortTensor(TensorBase &rhs) : TensorBase(rhs) {}
+
+  /**
+   * @brief Basic Destructor
+   */
+  ~ShortTensor() {}
+
+  /**
+   * @brief     Comparison operator overload
+   * @param[in] rhs Tensor to be compared with
+   * @note      Only compares Tensor data
+   */
+  bool operator==(const ShortTensor &rhs) const;
+
+  /**
+   * @brief     Comparison operator overload
+   * @param[in] rhs Tensor to be compared with
+   * @note      Only compares Tensor data
+   */
+  bool operator!=(const ShortTensor &rhs) const { return !(*this == rhs); }
+
+  /**
+   * @copydoc Tensor::allocate()
+   */
+  void allocate() override;
+
+  /**
+   * @copydoc Tensor::deallocate()
+   */
+  void deallocate() override;
+
+  /**
+   * @copydoc Tensor::getData()
+   */
+  void *getData() const override;
+
+  /**
+   * @copydoc Tensor::getData(size_t idx)
+   */
+  void *getData(size_t idx) const override;
+
+  /**
+   * @brief     i data index
+   * @retval    address of ith data
+   */
+  void *getAddress(unsigned int i) override;
+
+  /**
+   * @brief     i data index
+   * @retval    address of ith data
+   */
+  const void *getAddress(unsigned int i) const override;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] i index
+   */
+  const uint16_t &getValue(unsigned int i) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] i index
+   */
+  uint16_t &getValue(unsigned int i);
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] b batch location
+   * @param[in] c channel location
+   * @param[in] h height location
+   * @param[in] w width location
+   */
+  const uint16_t &getValue(unsigned int b, unsigned int c, unsigned int h,
+                           unsigned int w) const;
+
+  /**
+   * @brief     return value at specific location
+   * @param[in] b batch location
+   * @param[in] c channel location
+   * @param[in] h height location
+   * @param[in] w width location
+   */
+  uint16_t &getValue(unsigned int b, unsigned int c, unsigned int h,
+                     unsigned int w);
+
+  /**
+   * @copydoc Tensor::setValue(float value)
+   */
+  void setValue(float value) override;
+
+  /**
+   * @copydoc Tensor::setValue(b, c, h, w, value)
+   */
+  void setValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w,
+                float value) override;
+
+  /**
+   * @copydoc Tensor::addValue(b, c, h, w, value, beta)
+   */
+  void addValue(unsigned int b, unsigned int c, unsigned int h, unsigned int w,
+                float value, float beta) override;
+
+  /**
+   * @copydoc Tensor::setZero()
+   */
+  void setZero() override;
+
+  /**
+   * @copydoc Tensor::initialize()
+   */
+  void initialize() override;
+
+  /**
+   * @copydoc Tensor::initialize(Initializer init)
+   */
+  void initialize(Initializer init) override;
+
+  /**
+   * @copydoc Tensor::copy(const Tensor &from)
+   */
+  void copy(const Tensor &from) override;
+
+  /**
+   * @copydoc Tensor::copyData(const Tensor &from)
+   */
+  void copyData(const Tensor &from) override;
+
+  /**
+   * @copydoc Tensor::copy_with_stride()
+   */
+  void copy_with_stride(const Tensor &input, Tensor &output) override;
+
+  /**
+   * @copydoc Tensor::argmax()
+   */
+  std::vector<unsigned int> argmax() const override;
+
+  /**
+   * @copydoc Tensor::max_abs()
+   */
+  float max_abs() const override;
+
+  /**
+   * @copydoc Tensor::maxValue()
+   */
+  float maxValue() const override;
+
+  /**
+   * @copydoc Tensor::minValue()
+   */
+  float minValue() const override;
+
+  /**
+   * @copydoc Tensor::print(std::ostream &out)
+   */
+  void print(std::ostream &out) const override;
+
+private:
+  /**
+   * @brief copy a buffer to @a this, the caller has to ensure that @a this is
+   * initialized otherwise undefined behavior
+   *
+   * @param buf buffer to copy from
+   */
+  void copy(const void *buf);
+
+  /**
+   * @brief  Get the Data Type String object
+   * @return std::string of tensor data type (UINT16)
+   */
+  std::string getStringDataType() const override { return "UINT16"; }
+};
+
+} // namespace nntrainer
+
+#endif /* __cplusplus */
+#endif /* __SHORT_TENSOR_H__ */
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp
index 8ffe5792..5c38fcf0 100644
--- a/nntrainer/tensor/tensor.cpp
+++ b/nntrainer/tensor/tensor.cpp
@@ -12,6 +12,7 @@
 #include <char_tensor.h>
 #include <float_tensor.h>
 #include <lazy_tensor.h>
+#include <short_tensor.h>
 #include <tensor.h>
 
 #ifdef ENABLE_FP16
@@ -32,6 +33,9 @@ Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) {
 #else
     throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif
+  } else if (d_type == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(name_, fm),
+                                           std::default_delete<ShortTensor>());
   } else if (d_type == Tdatatype::QINT8) {
     itensor = std::shared_ptr<CharTensor>(new CharTensor(name_, fm),
                                           std::default_delete<CharTensor>());
@@ -59,6 +63,10 @@ Tensor::Tensor(const TensorDim &d, bool alloc_now, Initializer init,
 #else
     throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif
+  } else if (d.getDataType() == Tdatatype::UINT16) {
+    itensor =
+      std::shared_ptr<ShortTensor>(new ShortTensor(d, alloc_now, init, name),
+                                   std::default_delete<ShortTensor>());
   } else if (d.getDataType() == Tdatatype::QINT8) {
     itensor =
       std::shared_ptr<CharTensor>(new CharTensor(d, alloc_now, init, name),
@@ -84,6 +92,9 @@ Tensor::Tensor(const TensorDim &d, const void *buf) {
 #else
     throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif
+  } else if (d.getDataType() == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(d, buf),
+                                           std::default_delete<ShortTensor>());
   } else if (d.getDataType() == Tdatatype::QINT8) {
     itensor = std::shared_ptr<CharTensor>(new CharTensor(d, buf),
                                           std::default_delete<CharTensor>());
@@ -106,6 +117,9 @@ Tensor::Tensor(const Tensor &rhs) {
 #else
     throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif
+  } else if (rhs.getDataType() == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(*rhs.itensor),
+                                           std::default_delete<ShortTensor>());
   } else if (rhs.getDataType() == Tdatatype::QINT8) {
     itensor = std::shared_ptr<CharTensor>(new CharTensor(*rhs.itensor),
                                           std::default_delete<CharTensor>());
@@ -123,6 +137,9 @@ Tensor &Tensor::operator=(const Tensor &rhs) {
 #else
     throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif
+  } else if (rhs.getDataType() == Tdatatype::UINT16) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(*rhs.itensor),
+                                           std::default_delete<ShortTensor>());
   } else if (rhs.getDataType() == Tdatatype::QINT8) {
     itensor = std::shared_ptr<CharTensor>(new CharTensor(*rhs.itensor),
                                           std::default_delete<CharTensor>());
@@ -146,6 +163,9 @@ bool Tensor::operator==(const Tensor &rhs) const {
         "Error: HalfTensor cannot be created or used when FP16 is not enabled. "
         "Please check if the tensor data type is set properly.");
 #endif
+    } else if (getDataType() == Tdatatype::UINT16) {
+      return *std::dynamic_pointer_cast<ShortTensor>(itensor) ==
+             *std::dynamic_pointer_cast<ShortTensor>(rhs.itensor);
     } else if (getDataType() == Tdatatype::QINT8) {
       return *std::dynamic_pointer_cast<CharTensor>(itensor) ==
              *std::dynamic_pointer_cast<CharTensor>(rhs.itensor);
diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h
index 472d694f..74e0a343 100644
--- a/nntrainer/tensor/tensor.h
+++ b/nntrainer/tensor/tensor.h
@@ -27,6 +27,7 @@
 #include <char_tensor.h>
 #include <float_tensor.h>
 #include <nntrainer_log.h>
+#include <short_tensor.h>
 #include <tensor_base.h>
 
 #ifdef ENABLE_FP16
@@ -230,6 +231,37 @@ public:
     Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
 #endif
 
+  /**
+   * @brief     Constructor of Tensor
+   * @param[in] d data for the Tensor. It needs to set format properly.
+   * @param[in] t_type Tensor Type
+   */
+  Tensor(std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
+         ml::train::TensorDim::TensorType t_type) {
+    itensor = std::shared_ptr<ShortTensor>(new ShortTensor(d, t_type.format),
+                                           std::default_delete<ShortTensor>());
+  }
+
+  /**
+   * @brief     Constructor of Tensor
+   * @note      This constructor copies vector again. needs refactoring
+   * @param[in] d data for the Tensor. It needs to set format properly.
+   * @param[in] t_type Tensor Type
+   */
+  Tensor(std::vector<std::vector<std::vector<uint16_t>>> const &d,
+         ml::train::TensorDim::TensorType t_type) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+
+  /**
+   * @brief     Constructor of Tensor
+   * @note      This constructor copies vector again. needs refactoring
+   * @param[in] d data for the Tensor with batch size one
+   * @param[in] t_type Tensor Type
+   */
+  Tensor(std::vector<std::vector<uint16_t>> const &d,
+         ml::train::TensorDim::TensorType t_type) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+
   /**
    * @brief     Constructor of Tensor
    * @param[in] d data for the Tensor. It needs to set format properly.
diff --git a/nntrainer/tensor/tensor_dim.cpp b/nntrainer/tensor/tensor_dim.cpp
index 6bf1c278..753e64f7 100644
--- a/nntrainer/tensor/tensor_dim.cpp
+++ b/nntrainer/tensor/tensor_dim.cpp
@@ -33,9 +33,7 @@ TensorDim::TensorDim(TensorDim::Format fm, TensorDim::DataType d_type,
 TensorDim::TensorDim(TensorType t_type_,
                      const std::bitset<MAXDIM> &eff_dim_flag_,
                      const std::bitset<MAXDIM> &dyn_dim_flag_) :
-  t_type(t_type_),
-  eff_dim_flag(eff_dim_flag_),
-  dyn_dim_flag(dyn_dim_flag_) {
+  t_type(t_type_), eff_dim_flag(eff_dim_flag_), dyn_dim_flag(dyn_dim_flag_) {
   for (size_t i = 0; i < MAXDIM; ++i) {
     dim[i] = 0;
   }
@@ -157,6 +155,8 @@ uint TensorDim::getDataTypeSize() const {
 #endif
   case TensorDim::DataType::FP32:
     return sizeof(float);
+  case TensorDim::DataType::UINT16:
+    return sizeof(uint16_t);
   case TensorDim::DataType::QINT8:
     return sizeof(int8_t);
   case TensorDim::DataType::QINT4:
@@ -378,6 +378,8 @@ std::ostream &operator<<(std::ostream &out, TensorDim const &d) {
     type_ = "FP32";
   } else if (d.getDataType() == ml::train::TensorDim::DataType::FP16) {
     type_ = "FP16";
+  } else if (d.getDataType() == ml::train::TensorDim::DataType::UINT16) {
+    type_ = "UINT16";
   } else if (d.getDataType() == ml::train::TensorDim::DataType::QINT8) {
     type_ = "QINT8";
   } else if (d.getDataType() == ml::train::TensorDim::DataType::QINT4) {
diff --git a/packaging/nntrainer.spec b/packaging/nntrainer.spec
index 2266ca39..80d682c9 100644
--- a/packaging/nntrainer.spec
+++ b/packaging/nntrainer.spec
@@ -529,6 +529,7 @@ cp -r result %{buildroot}%{_datadir}/nntrainer/unittest/
 %{_includedir}/nntrainer/tensor.h
 %{_includedir}/nntrainer/tensor_base.h
 %{_includedir}/nntrainer/char_tensor.h
+%{_includedir}/nntrainer/short_tensor.h
 %{_includedir}/nntrainer/float_tensor.h
 %if 0%{?enable_fp16}
 %{_includedir}/nntrainer/half_tensor.h
diff --git a/test/unittest/unittest_nntrainer_tensor.cpp b/test/unittest/unittest_nntrainer_tensor.cpp
index 3a161874..0d6d47be 100644
--- a/test/unittest/unittest_nntrainer_tensor.cpp
+++ b/test/unittest/unittest_nntrainer_tensor.cpp
@@ -227,28 +227,28 @@ TEST(nntrainer_Tensor, Tensor_04_p) {
   EXPECT_EQ(status, ML_ERROR_NONE);
 }
 
-// TEST(nntrainer_Tensor, Tensor_05_p) {
-//   int status = ML_ERROR_NONE;
-//   std::vector<std::vector<std::vector<uint8_t>>> in = {{{0, 1}, {2, 3}},
-//                                                        {{4, 5}, {6, 7}},
-//                                                        {{8, 9}, {10, 11}},
-//                                                        {{12, 13}, {14, 15}}};
-
-//   nntrainer::Tensor tensor = nntrainer::Tensor(
-//     in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4});
-//   ASSERT_NE(nullptr, tensor.getData<uint8_t>());
+TEST(nntrainer_Tensor, Tensor_05_p) {
+  int status = ML_ERROR_NONE;
+  std::vector<std::vector<std::vector<uint16_t>>> in = {{{0, 1}, {2, 3}},
+                                                        {{4, 5}, {6, 7}},
+                                                        {{8, 9}, {10, 11}},
+                                                        {{12, 13}, {14, 15}}};
 
-//   for (size_t b = 0; b < tensor.batch(); ++b) {
-//     for (size_t c = 0; c < tensor.channel(); ++c) {
-//       for (size_t h = 0; h < tensor.height(); ++h) {
-//         for (size_t w = 0; w < tensor.width(); ++w) {
-//           size_t idx = tensor.getIndex(b, c, h, w);
-//           ASSERT_EQ(idx, tensor.getValueQint4(idx));
-//         }
-//       }
-//     }
-//   }
-// }
+  nntrainer::Tensor tensor = nntrainer::Tensor(
+    in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16});
+  ASSERT_NE(nullptr, tensor.getData<uint16_t>());
+
+  for (size_t b = 0; b < tensor.batch(); ++b) {
+    for (size_t c = 0; c < tensor.channel(); ++c) {
+      for (size_t h = 0; h < tensor.height(); ++h) {
+        for (size_t w = 0; w < tensor.width(); ++w) {
+          size_t idx = tensor.getIndex(b, c, h, w);
+          ASSERT_EQ(idx, tensor.getValue<uint16_t>(idx));
+        }
+      }
+    }
+  }
+}
 
 // TEST(nntrainer_Tensor, Tensor_06_p) {
 //   int status = ML_ERROR_NONE;
@@ -3131,7 +3131,7 @@ TEST(nntrainer_Tensor, save_read_01_n) {
   ASSERT_EQ(status, 0);
 }
 
-TEST(nntrainer_Tensor, copy_and_shares_variable_p) {
+TEST(nntrainer_Tensor, copy_and_shares_variable_01_p) {
   nntrainer::Tensor A = constant(1.0f, 3, 4, 5, 6);
   nntrainer::Tensor B = A.clone();
   nntrainer::Tensor C = A;
@@ -3146,6 +3146,23 @@ TEST(nntrainer_Tensor, copy_and_shares_variable_p) {
   EXPECT_NE(A.getDim(), C.getDim());
 }
 
+TEST(nntrainer_Tensor, copy_and_shares_variable_02_p) {
+  nntrainer::Tensor A = constant(10, 3, 4, 5, 6, nntrainer::Tformat::NCHW,
+                                 nntrainer::Tdatatype::UINT16);
+  nntrainer::Tensor B = A.clone();
+  nntrainer::Tensor C = A;
+
+  C.setValue(1, 1, 1, 1, 9);
+
+  EXPECT_EQ(A, C);
+  EXPECT_NE(B, C);
+
+  C.reshape(nntrainer::TensorDim(3, 4, 6, 5, nntrainer::Tformat::NCHW,
+                                 nntrainer::Tdatatype::UINT16));
+  EXPECT_EQ(A.getDim(), B.getDim());
+  EXPECT_NE(A.getDim(), C.getDim());
+}
+
 TEST(nntrainer_Tensor, reshape_n_01) {
   nntrainer::Tensor A = constant(1.0f, 3, 4, 5, 6);
 
@@ -3352,15 +3369,15 @@ TEST(nntrainer_Tensor, allocate_04_p) {
   EXPECT_TRUE(t.isAllocated());
 }
 
-// TEST(nntrainer_Tensor, allocate_05_p) {
-//   nntrainer::Tensor t(
-//     {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}},
-//     true);
-//   EXPECT_TRUE(t.isAllocated());
+TEST(nntrainer_Tensor, allocate_05_p) {
+  nntrainer::Tensor t(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true);
+  EXPECT_TRUE(t.isAllocated());
 
-//   t.allocate();
-//   EXPECT_TRUE(t.isAllocated());
-// }
+  t.allocate();
+  EXPECT_TRUE(t.isAllocated());
+}
 
 TEST(nntrainer_Tensor, initialize_01_p) {
   nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Initializer::ONES);
@@ -3479,6 +3496,28 @@ TEST(nntrainer_Tensor, initialize_09_p) {
   EXPECT_EQ(golden, t);
 }
 
+TEST(nntrainer_Tensor, initialize_10_p) {
+  nntrainer::Tensor t(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true, nntrainer::Initializer::ONES);
+  nntrainer::Tensor golden(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true, nntrainer::Initializer::ZEROS);
+  EXPECT_NE(golden, t);
+  golden.initialize(nntrainer::Initializer::ONES);
+  EXPECT_EQ(golden, t);
+}
+
+TEST(nntrainer_Tensor, initialize_11_n) {
+  nntrainer::Tensor t(
+    {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::UINT16}},
+    true);
+
+  /// @note ShortTensor does not support HE_NORMAL initialization
+  EXPECT_THROW(t.initialize(nntrainer::Initializer::HE_NORMAL),
+               std::invalid_argument);
+}
+
 TEST(nntrainer_Tensor, split_01_p) {
   {
     nntrainer::TensorDim ref_dim(3, 2, 4, 5);