[Tensor] Optimize dequantize operation
authorDonghyeon Jeong <dhyeon.jeong@samsung.com>
Thu, 12 Oct 2023 02:05:05 +0000 (11:05 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Fri, 20 Oct 2023 07:09:19 +0000 (16:09 +0900)
- Perform dequantization by utilizing tensor operations instead of manual calculation.
- Tensor now contains two types of scale factors (FP32, FP16).
- Add flate function that copies tensor values of different data types. This function is temporal and should be later replaced by scopy to fasten up.

**Self-evaluation:**
1. Build test:   [X]Passed [ ]Failed [ ]Skipped
2. Run test:     [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
nntrainer/layers/fc_layer.cpp
nntrainer/tensor/tensor.cpp
nntrainer/tensor/tensor.h
test/unittest/unittest_nntrainer_tensor.cpp
test/unittest/unittest_nntrainer_tensor_fp16.cpp
test/unittest/unittest_nntrainer_tensor_nhwc.cpp

index 57bc0c1e71485a64385efe26bbc399d63b522963..b79773fae2d3e05c8cd8648124c6f62c92957d45 100644 (file)
@@ -132,14 +132,7 @@ void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) {
     unsigned int axis =
       context.getWeightObject(weight_idx[FCParams::weight]).getOutputAxis();
 
-    if (dtype == nntrainer::Tdatatype::FP32)
-      weight.dequantize<float>(weight_, axis);
-    else if (dtype == nntrainer::Tdatatype::FP16)
-#ifdef ENABLE_FP16
-      weight.dequantize<_FP16>(weight_, axis);
-#else
-      ml_loge("%s", "Error: enable-fp16 is not enabled");
-#endif
+    weight.dequantize(weight_, axis);
     input_.dot(weight_, hidden_, false, false);
   } else {
     input_.dot(weight, hidden_, false, false);
index 03a82e566d5386b2f311a0bad9a41f6c10678f45..574e47af555ed0e6301f9521fbb40f2aa5114ab4 100644 (file)
@@ -3160,12 +3160,12 @@ void Tensor::read(std::ifstream &file, Tdatatype s_type) {
       if (s_type == Tdatatype::FP32) {
         float scale;
         file.read((char *)&scale, sizeof(float));
-        scale_factors.push_back(scale);
+        scale_factors_32.push_back(scale);
       } else if (s_type == Tdatatype::FP16) {
 #ifdef ENABLE_FP16
         _FP16 scale;
         file.read((char *)&scale, sizeof(_FP16));
-        scale_factors.push_back((float)scale);
+        scale_factors_16.push_back(scale);
 #else
         throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif
@@ -3650,10 +3650,20 @@ void Tensor::setScaleFactors(std::vector<float> scales) {
     throw std::invalid_argument("Error: invalid parameter");
   }
 
-  scale_factors = scales;
+  scale_factors_32 = scales;
 }
 
-std::vector<float> Tensor::getScaleFactors() const { return scale_factors; }
+#ifdef ENABLE_FP16
+void Tensor::setScaleFactors16(std::vector<_FP16> scales) {
+  if (scales.empty()) {
+    throw std::invalid_argument("Error: invalid parameter");
+  }
+
+  scale_factors_16 = scales;
+}
+#endif
+
+std::vector<float> Tensor::getScaleFactors() const { return scale_factors_32; }
 
 void Tensor::setZeroPoints(std::vector<uint8_t> zp) {
   if (zp.empty()) {
@@ -3665,6 +3675,126 @@ void Tensor::setZeroPoints(std::vector<uint8_t> zp) {
 
 std::vector<uint8_t> Tensor::getZeroPoints() const { return zero_points; }
 
+void Tensor::flate(Tensor &output) const {
+  if (output.getDataType() == Tdatatype::FP32) {
+    float *o_data = output.getData<float>();
+    const uint8_t *data = getData<uint8_t>();
+
+    if (getDataType() == Tdatatype::QINT4) {
+      for (unsigned int i = 0; i < (output.getDim().getDataLen() + 1) / 2;
+           ++i) {
+        unsigned int idx = i * 2;
+        o_data[idx] = data[i] >> 4;
+        if (idx + 1 < output.getDim().getDataLen())
+          o_data[idx + 1] = data[i] & 0x0f;
+      }
+    } else if (getDataType() == Tdatatype::QINT8) {
+      for (unsigned int i = 0; i < output.getDim().getDataLen(); ++i) {
+        o_data[i] = data[i];
+      }
+    }
+  } else if (output.getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
+    _FP16 *o_data = output.getData<_FP16>();
+    const uint8_t *data = getData<uint8_t>();
+
+    if (getDataType() == Tdatatype::QINT8) {
+      for (unsigned int i = 0; i < output.getDim().getDataLen(); ++i) {
+        o_data[i] = data[i];
+      }
+    }
+#else
+    throw std::invalid_argument("enble-fp16 is not set");
+#endif
+  }
+}
+
+void Tensor::dequantize(Tensor &output, unsigned int axis) const {
+  if (getDataType() == Tdatatype::FP32 || getDataType() == Tdatatype::FP16) {
+    throw std::invalid_argument("Error: Tensor cannot be dequantized");
+  }
+
+  if (output.getDataType() == Tdatatype::QINT8 ||
+      output.getDataType() == Tdatatype::QINT4) {
+    throw std::invalid_argument("Error: Target datatype is quantized type");
+  }
+
+  if (getFormat() != output.getFormat())
+    throw std::invalid_argument("Error: TensorType do not match");
+
+  if (batch() != output.batch() || channel() != output.channel() ||
+      width() != output.width() || height() != output.height())
+    throw std::invalid_argument("Error: TensorDim do not match");
+
+  if (output.getDataType() == Tdatatype::FP32 && scale_factors_32.empty()) {
+    throw std::invalid_argument("Error: No scale factors");
+  }
+#ifdef ENABLE_FP16
+  if (output.getDataType() == Tdatatype::FP16 && scale_factors_16.empty()) {
+    throw std::invalid_argument("Error: No scale factors");
+  }
+#endif
+  if (axis == 0 && zero_points.size() != batch()) {
+    throw std::invalid_argument("Error: output axis do not match ");
+  }
+
+  if (axis == 1 && zero_points.size() != channel()) {
+    throw std::invalid_argument("Error: output axis do not match ");
+  }
+
+  if (axis == 2 && zero_points.size() != height()) {
+    throw std::invalid_argument("Error: output axis do not match ");
+  }
+
+  if (axis == 3 && zero_points.size() != width()) {
+    throw std::invalid_argument("Error: output axis do not match ");
+  }
+
+  size_t b = (axis == 0) ? zero_points.size() : 1;
+  size_t c = (axis == 1) ? zero_points.size() : 1;
+  size_t h = (axis == 2) ? zero_points.size() : 1;
+  size_t w = (axis == 3) ? zero_points.size() : 1;
+
+  if (output.getDataType() == Tdatatype::FP16) {
+#ifdef ENABLE_FP16
+    if (getDataType() == Tdatatype::QINT4) {
+      scopy((size() + 1) / 2, getData<uint8_t>(), 1, output.getData<_FP16>(),
+            1);
+    } else if (getDataType() == Tdatatype::QINT8) {
+      // @todo scopy for qint8
+      flate(output);
+    }
+
+    std::vector<_FP16> zero_points_16(zero_points.begin(), zero_points.end());
+    Tensor zero_points_fp16({{b, c, h, w}, {getFormat(), Tdatatype::FP16}},
+                            zero_points_16.data());
+
+    Tensor scale_factors_fp16({{b, c, h, w}, {getFormat(), Tdatatype::FP16}},
+                              scale_factors_16.data());
+
+    output.subtract_i(zero_points_fp16);
+    output.multiply_i(scale_factors_fp16);
+
+#else
+    throw std::invalid_argument("enble-fp16 is not set");
+#endif
+  } else if (output.getDataType() == Tdatatype::FP32) {
+    // @todo need scopy for uint8 to float
+    flate(output);
+
+    std::vector<float> zero_points_32(zero_points.begin(), zero_points.end());
+    Tensor zero_points_fp32({{b, c, h, w}, {getFormat(), Tdatatype::FP32}},
+                            zero_points_32.data());
+    Tensor scale_factors_fp32({{b, c, h, w}, {getFormat(), Tdatatype::FP32}},
+                              scale_factors_32.data());
+
+    output.subtract_i(zero_points_fp32);
+    output.multiply_i(scale_factors_fp32);
+  }
+
+  return;
+}
+
 // namespace nntrainer
 
 } /* namespace nntrainer */
index 1cb607d7ebcfb337347150f892ccd894e0e6e78d..fb29c237c6f057e6c3648779a0cda0cc8a397c85 100644 (file)
@@ -1971,6 +1971,14 @@ public:
    */
   void setZeroPoints(std::vector<uint8_t> zp);
 
+#ifdef ENABLE_FP16
+  /**
+   * @brief     Set fp16 scale factors of the tensor
+   * @param[in] scales fp16 scale factors
+   */
+  void setScaleFactors16(std::vector<_FP16> scales);
+#endif
+
   /**
    * @brief Get zero points of the tensor
    *
@@ -1979,102 +1987,16 @@ public:
   std::vector<uint8_t> getZeroPoints() const;
 
   /**
-   * @brief     Dequantize Tensor
-   * @retval    Dequantized Tensor
+   * @brief      Dequantize Tensor to output tensor datatype
+   * @param[out] output Tensor to store the result
    */
-  template <typename T = float> Tensor dequantize(unsigned int axis) const {
-    Tdatatype dtype =
-      (typeid(T) == typeid(float)) ? Tdatatype::FP32 : Tdatatype::FP16;
-
-    Tensor t =
-      Tensor(batch(), channel(), height(), width(), getFormat(), dtype);
-
-    return dequantize<T>(t, axis);
-  }
+  void dequantize(Tensor &output, unsigned int axis) const;
 
   /**
-   * @brief      Dequantize Tensor to output tensor datatype
+   * @brief      copy QINT Tensor and save to output tensor
    * @param[out] output Tensor to store the result
-   * @retval     Dequantized Tensor
    */
-  template <typename T>
-  Tensor &dequantize(Tensor &output, unsigned int axis) const {
-    if (getDataType() == Tdatatype::FP32 || getDataType() == Tdatatype::FP16) {
-      throw std::invalid_argument("Error: Tensor cannot be dequantized");
-    }
-
-    if (output.getDataType() == Tdatatype::QINT8 ||
-        output.getDataType() == Tdatatype::QINT4) {
-      throw std::invalid_argument("Error: Target datatype is quantized type");
-    }
-
-    if (getFormat() != output.getFormat())
-      throw std::invalid_argument("Error: TensorType do not match");
-
-    if (batch() != output.batch() || channel() != output.channel() ||
-        width() != output.width() || height() != output.height())
-      throw std::invalid_argument("Error: TensorDim do not match");
-
-    if (scale_factors.empty()) {
-      throw std::invalid_argument("Error: No scale factors");
-    }
-
-    if (zero_points.empty()) {
-      throw std::invalid_argument("Error: No zero points");
-    }
-
-    if (axis == 0 && scale_factors.size() != batch() &&
-        zero_points.size() != batch()) {
-      throw std::invalid_argument("Error: output axis do not match ");
-    }
-
-    if (axis == 1 && scale_factors.size() != channel() &&
-        zero_points.size() != channel()) {
-      throw std::invalid_argument("Error: output axis do not match ");
-    }
-
-    if (axis == 2 && scale_factors.size() != height() &&
-        zero_points.size() != height()) {
-      throw std::invalid_argument("Error: output axis do not match ");
-    }
-
-    if (axis == 3 && scale_factors.size() != width() &&
-        zero_points.size() != width()) {
-      throw std::invalid_argument("Error: output axis do not match ");
-    }
-
-    int idx = 0;
-    for (unsigned int b = 0; b < batch(); ++b) {
-      for (unsigned int c = 0; c < channel(); ++c) {
-        for (unsigned int h = 0; h < height(); ++h) {
-          for (unsigned int w = 0; w < width(); ++w) {
-            if (axis == 0)
-              idx = b;
-            else if (axis == 1)
-              idx = c;
-            else if (axis == 2)
-              idx = h;
-            else if (axis == 3)
-              idx = w;
-
-            if (getDataType() == Tdatatype::QINT8) {
-              output.setValue(
-                b, c, h, w,
-                (T)(getValue<uint8_t>(b, c, h, w) - zero_points[idx]) *
-                  scale_factors[idx]);
-            } else {
-              output.setValue(
-                b, c, h, w,
-                (T)(getValueQint4(b, c, h, w) - zero_points[idx]) *
-                  scale_factors[idx]);
-            }
-          }
-        }
-      }
-    }
-
-    return output;
-  }
+  void flate(Tensor &output) const;
 
   static constexpr float epsilon = 1e-5;
 
@@ -2087,7 +2009,10 @@ private:
   std::string name; /**< name of the tensor */
   std::shared_ptr<MemoryData> data;
   size_t offset;
-  std::vector<float> scale_factors;
+  std::vector<float> scale_factors_32;
+#ifdef ENABLE_FP16
+  std::vector<_FP16> scale_factors_16;
+#endif
   std::vector<uint8_t> zero_points;
 
   /**<
index e32c02d8bb65d573af53029bb9fb4d95f54ba0c2..8f03b39fac484e250c9d10dcc353487c57f1a807 100644 (file)
@@ -4359,7 +4359,7 @@ TEST(nntrainer_Tensor, dequantize_01_n) {
 
   nntrainer::Tensor output(batch, channel, height, width);
 
-  EXPECT_THROW({ input.dequantize<float>(output, 1); }, std::invalid_argument);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 /**
@@ -4380,7 +4380,7 @@ TEST(nntrainer_Tensor, dequantize_02_n) {
 
   nntrainer::Tensor output(batch, channel, height, width);
 
-  EXPECT_THROW({ input.dequantize<float>(output, 1); }, std::invalid_argument);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 /**
@@ -4399,7 +4399,7 @@ TEST(nntrainer_Tensor, dequantize_03_n) {
 
   nntrainer::Tensor output(batch, channel, height, width);
 
-  EXPECT_THROW({ input.dequantize<float>(output, 1); }, std::invalid_argument);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 /**
@@ -4422,8 +4422,8 @@ TEST(nntrainer_Tensor, dequantize_04_n) {
 
   input.setScaleFactors({2.0, 1.5, 1.0, 0.5});
   input.setZeroPoints({2, 3, 4, 5});
-  EXPECT_THROW({ input.dequantize<float>(output, 1); }, std::invalid_argument);
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 2); });
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
+  EXPECT_NO_THROW({ input.dequantize(output, 2); });
 }
 
 /**
@@ -4446,153 +4446,7 @@ TEST(nntrainer_Tensor, dequantize_05_n) {
     batch, channel, height, width,
     {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
 
-  EXPECT_THROW({ input.dequantize<float>(output, 1); }, std::invalid_argument);
-}
-
-/**
- * @brief dequantize tensor
- */
-TEST(nntrainer_Tensor, dequantize_06_p) {
-  size_t batch = 1;
-  size_t channel = 3;
-  size_t height = 4;
-  size_t width = 5;
-
-  nntrainer::Tensor input(
-    {batch,
-     channel,
-     height,
-     width,
-     {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}},
-    true, nntrainer::Tensor::Initializer::ZEROS);
-  nntrainer::Tensor output(batch, channel, height, width);
-
-  // Dequantize by channel
-  EXPECT_NO_THROW(input.setScaleFactors({2, -2, -4}));
-  EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 1); });
-
-  float answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-                           -2, -2, -2, -2, -2, -2, -2, -2, 2,  2,  2,  2,
-                           2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-                           2,  2,  2,  2,  4,  4,  4,  4,  4,  4,  4,  4,
-                           4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4};
-
-  nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width,
-                                                 {nntrainer::Tformat::NCHW,
-                                                  nntrainer::Tdatatype::FP32}),
-                            answer_data_1);
-
-  EXPECT_EQ(output, answer1);
-
-  // Dequantize by height
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4.8}));
-  EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 2); });
-
-  float answer_data_2[] = {
-    -4.2, -4.2, -4.2, -4.2, -4.2, -2,   -2,   -2,   -2,   -2,   2,    2,
-    2,    2,    2,    4.8,  4.8,  4.8,  4.8,  4.8,  -4.2, -4.2, -4.2, -4.2,
-    -4.2, -2,   -2,   -2,   -2,   -2,   2,    2,    2,    2,    2,    4.8,
-    4.8,  4.8,  4.8,  4.8,  -4.2, -4.2, -4.2, -4.2, -4.2, -2,   -2,   -2,
-    -2,   -2,   2,    2,    2,    2,    2,    4.8,  4.8,  4.8,  4.8,  4.8};
-  nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width,
-                                                 {nntrainer::Tformat::NCHW,
-                                                  nntrainer::Tdatatype::FP32}),
-                            answer_data_2);
-
-  EXPECT_EQ(output, answer2);
-
-  // Dequantize by width
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4, 8}));
-  EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 3); });
-
-  float answer_data_3[] = {
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8};
-
-  nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width,
-                                                 {nntrainer::Tformat::NCHW,
-                                                  nntrainer::Tdatatype::FP32}),
-                            answer_data_3);
-
-  EXPECT_EQ(output, answer3);
-}
-
-/**
- * @brief dequantize qint4 tensor
- */
-TEST(nntrainer_Tensor, dequantize_08_p) {
-  size_t batch = 1;
-  size_t channel = 3;
-  size_t height = 4;
-  size_t width = 5;
-
-  nntrainer::Tensor input(
-    {batch,
-     channel,
-     height,
-     width,
-     {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}},
-    true, nntrainer::Tensor::Initializer::ZEROS);
-  nntrainer::Tensor output(batch, channel, height, width);
-
-  // Dequantize by channel
-  EXPECT_NO_THROW(input.setScaleFactors({2, -2, -4}));
-  EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 1); });
-
-  float answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-                           -2, -2, -2, -2, -2, -2, -2, -2, 2,  2,  2,  2,
-                           2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-                           2,  2,  2,  2,  4,  4,  4,  4,  4,  4,  4,  4,
-                           4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4};
-
-  nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width,
-                                                 {nntrainer::Tformat::NCHW,
-                                                  nntrainer::Tdatatype::FP32}),
-                            answer_data_1);
-
-  EXPECT_EQ(output, answer1);
-
-  // Dequantize by height
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4}));
-  EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 2); });
-
-  float answer_data_2[] = {-4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2, -2, -2,
-                           2,    2,    2,    2,    2,    4,  4,  4,  4,  4,
-                           -4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2, -2, -2,
-                           2,    2,    2,    2,    2,    4,  4,  4,  4,  4,
-                           -4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2, -2, -2,
-                           2,    2,    2,    2,    2,    4,  4,  4,  4,  4};
-  nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width,
-                                                 {nntrainer::Tformat::NCHW,
-                                                  nntrainer::Tdatatype::FP32}),
-                            answer_data_2);
-
-  EXPECT_EQ(output, answer2);
-
-  // Dequantize by width
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4, 8}));
-  EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 3); });
-
-  float answer_data_3[] = {
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
-    -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8};
-
-  nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width,
-                                                 {nntrainer::Tformat::NCHW,
-                                                  nntrainer::Tdatatype::FP32}),
-                            answer_data_3);
-
-  EXPECT_EQ(output, answer3);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 int main(int argc, char **argv) {
index 76f2c97280118ae2a8cdce72beba76d66ec55a35..24f6cfcf0efe8cc812ca64ffc05787e8c304a546 100644 (file)
@@ -5801,14 +5801,16 @@ TEST(nntrainer_Tensor, dequantize_01_n) {
   nntrainer::Tensor input(batch, channel, height, width,
                           nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16);
   GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
-  input.setScaleFactors({1.5, 1.0, 0.5});
+
+  input.setScaleFactors16({static_cast<_FP16>(1.5), static_cast<_FP16>(1.0),
+                           static_cast<_FP16>(0.5)});
   input.setZeroPoints({1, 4, 7});
 
   nntrainer::Tensor output(batch, channel, height, width,
                            nntrainer::Tformat::NCHW,
                            nntrainer::Tdatatype::FP16);
 
-  EXPECT_THROW({ input.dequantize<_FP16>(output, 1); }, std::invalid_argument);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 /**
@@ -5825,14 +5827,15 @@ TEST(nntrainer_Tensor, dequantize_02_n) {
     {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
   GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
 
-  input.setScaleFactors({1.5, 1.0, 0.5});
+  input.setScaleFactors16({static_cast<_FP16>(1.5), static_cast<_FP16>(1.0),
+                           static_cast<_FP16>(0.5)});
   input.setZeroPoints({1, 4, 7});
 
   nntrainer::Tensor output(batch, channel, height, width,
                            nntrainer::Tformat::NCHW,
                            nntrainer::Tdatatype::FP16);
 
-  EXPECT_THROW({ input.dequantize<_FP16>(output, 1); }, std::invalid_argument);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 /**
@@ -5853,7 +5856,7 @@ TEST(nntrainer_Tensor, dequantize_03_n) {
                            nntrainer::Tformat::NCHW,
                            nntrainer::Tdatatype::FP16);
 
-  EXPECT_THROW({ input.dequantize<_FP16>(output, 1); }, std::invalid_argument);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 /**
@@ -5869,12 +5872,15 @@ TEST(nntrainer_Tensor, dequantize_04_p) {
     batch, channel, height, width,
     {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
   GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
-  input.setScaleFactors({1.5, 1.0, 0.5});
+
+  input.setScaleFactors16({static_cast<_FP16>(1.5), static_cast<_FP16>(1.0),
+                           static_cast<_FP16>(0.5)});
   input.setZeroPoints({0, 0, 0});
 
-  nntrainer::Tensor output;
+  nntrainer::Tensor output(
+    {1, 3, 4, 5, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16}}, true);
 
-  EXPECT_NO_THROW({ output = input.dequantize<_FP16>(1); });
+  EXPECT_NO_THROW({ input.dequantize(output, 1); });
 
   _FP16 answer_data[] = {
     static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), static_cast<_FP16>(1.5),
@@ -5927,9 +5933,10 @@ TEST(nntrainer_Tensor, dequantize_05_p) {
                            nntrainer::Tdatatype::FP16);
 
   // Dequantize by channel
-  EXPECT_NO_THROW(input.setScaleFactors({2, -2, -4}));
+  EXPECT_NO_THROW(input.setScaleFactors16(
+    {static_cast<_FP16>(2), static_cast<_FP16>(-2), static_cast<_FP16>(-4)}));
   EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<_FP16>(output, 1); });
+  EXPECT_NO_THROW({ input.dequantize(output, 1); });
 
   _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
                            -2, -2, -2, -2, -2, -2, -2, -2, 2,  2,  2,  2,
@@ -5945,9 +5952,12 @@ TEST(nntrainer_Tensor, dequantize_05_p) {
   EXPECT_EQ(output, answer1);
 
   // Dequantize by height
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4.8}));
+
+  EXPECT_NO_THROW(input.setScaleFactors16(
+    {static_cast<_FP16>(4.2), static_cast<_FP16>(2), static_cast<_FP16>(-2),
+     static_cast<_FP16>(-4.8)}));
   EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<_FP16>(output, 2); });
+  EXPECT_NO_THROW({ input.dequantize(output, 2); });
 
   _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
                            static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
@@ -5987,9 +5997,11 @@ TEST(nntrainer_Tensor, dequantize_05_p) {
   EXPECT_EQ(output, answer2);
 
   // Dequantize by width
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4, 8}));
+  EXPECT_NO_THROW(input.setScaleFactors16(
+    {static_cast<_FP16>(4.2), static_cast<_FP16>(2), static_cast<_FP16>(-2),
+     static_cast<_FP16>(-4), static_cast<_FP16>(8)}));
   EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<_FP16>(output, 3); });
+  EXPECT_NO_THROW({ input.dequantize(output, 3); });
 
   _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
                            static_cast<_FP16>(2),    static_cast<_FP16>(4),
@@ -6051,9 +6063,10 @@ TEST(nntrainer_Tensor, dequantize_06_p) {
                            nntrainer::Tdatatype::FP16);
 
   // Dequantize by channel
-  EXPECT_NO_THROW(input.setScaleFactors({2, -2, -4}));
+  EXPECT_NO_THROW(input.setScaleFactors16(
+    {static_cast<_FP16>(2), static_cast<_FP16>(-2), static_cast<_FP16>(-4)}));
   EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<_FP16>(output, 1); });
+  EXPECT_NO_THROW({ input.dequantize(output, 1); });
 
   _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
                            -2, -2, -2, -2, -2, -2, -2, -2, 2,  2,  2,  2,
@@ -6069,9 +6082,11 @@ TEST(nntrainer_Tensor, dequantize_06_p) {
   EXPECT_EQ(output, answer1);
 
   // Dequantize by height
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4}));
+  EXPECT_NO_THROW(
+    input.setScaleFactors16({static_cast<_FP16>(4.2), static_cast<_FP16>(2),
+                             static_cast<_FP16>(-2), static_cast<_FP16>(-4)}));
   EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<_FP16>(output, 2); });
+  EXPECT_NO_THROW({ input.dequantize(output, 2); });
 
   _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
                            static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
@@ -6111,9 +6126,11 @@ TEST(nntrainer_Tensor, dequantize_06_p) {
   EXPECT_EQ(output, answer2);
 
   // Dequantize by width
-  EXPECT_NO_THROW(input.setScaleFactors({4.2, 2, -2, -4, 8}));
+  EXPECT_NO_THROW(input.setScaleFactors16(
+    {static_cast<_FP16>(4.2), static_cast<_FP16>(2), static_cast<_FP16>(-2),
+     static_cast<_FP16>(-4), static_cast<_FP16>(8)}));
   EXPECT_NO_THROW(input.setZeroPoints({1, 1, 1, 1, 1}));
-  EXPECT_NO_THROW({ input.dequantize<_FP16>(output, 3); });
+  EXPECT_NO_THROW({ input.dequantize(output, 3); });
 
   _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
                            static_cast<_FP16>(2),    static_cast<_FP16>(4),
index 7bd430e8cc8de1febbcc03b08bd3be4a42b1fda4..9a0e4ffa2d428e2b16b5d6faa2840d337c41de75 100644 (file)
@@ -4695,7 +4695,7 @@ TEST(nntrainer_Tensor, dequantize_01_n) {
     batch, channel, height, width,
     {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::FP32});
 
-  EXPECT_THROW({ input.dequantize<float>(output, 1); }, std::invalid_argument);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 /**
@@ -4718,115 +4718,7 @@ TEST(nntrainer_Tensor, dequantize_02_n) {
     batch, channel, height, width,
     {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP32});
 
-  EXPECT_THROW({ input.dequantize<float>(output, 1); }, std::invalid_argument);
-}
-
-/**
- * @brief dequantize nhwc tensor
- */
-TEST(nntrainer_Tensor, dequantize_03_p) {
-  int batch = 1;
-  int channel = 3;
-  int height = 4;
-  int width = 5;
-
-  nntrainer::Tensor input(
-    batch, channel, height, width,
-    {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT8});
-  GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
-  input.setScaleFactors({1.5, 1.0, 0.5});
-  input.setZeroPoints({5, 5, 5});
-
-  nntrainer::Tensor output;
-  output.getDim().setFormat(nntrainer::Tformat::NHWC);
-
-  EXPECT_NO_THROW({ output = input.dequantize<float>(1); });
-
-  float answer_data[] = {
-    -6,   1, 3,   -6,   1, 3,   -6,   1, 3,   -6,   1, 3,   -6,   1, 3,
-    -4.5, 2, 3.5, -4.5, 2, 3.5, -4.5, 2, 3.5, -4.5, 2, 3.5, -4.5, 2, 3.5,
-    -3,   3, 4,   -3,   3, 4,   -3,   3, 4,   -3,   3, 4,   -3,   3, 4,
-    -1.5, 4, 4.5, -1.5, 4, 4.5, -1.5, 4, 4.5, -1.5, 4, 4.5, -1.5, 4, 4.5};
-
-  nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, width,
-                                                {nntrainer::Tformat::NHWC,
-                                                 nntrainer::Tdatatype::FP32}),
-                           answer_data);
-
-  EXPECT_EQ(output, answer);
-}
-
-/**
- * @brief dequantize nhwc tensor
- */
-TEST(nntrainer_Tensor, dequantize_04_p) {
-  int batch = 1;
-  int channel = 3;
-  int height = 4;
-  int width = 5;
-
-  nntrainer::Tensor input(
-    batch, channel, height, width,
-    {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT8});
-  GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
-  input.setScaleFactors({2.5, 2.0, 1.5, 1.0});
-  input.setZeroPoints({8, 8, 8, 8});
-
-  nntrainer::Tensor output(
-    batch, channel, height, width,
-    {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::FP32});
-
-  EXPECT_NO_THROW({ input.dequantize<float>(output, 2); });
-
-  float answer_data[] = {
-    -17.5, -5, 7.5, -17.5, -5, 7.5, -17.5, -5, 7.5, -17.5, -5, 7.5,
-    -17.5, -5, 7.5, -12,   -2, 8,   -12,   -2, 8,   -12,   -2, 8,
-    -12,   -2, 8,   -12,   -2, 8,   -7.5,  0,  7.5, -7.5,  0,  7.5,
-    -7.5,  0,  7.5, -7.5,  0,  7.5, -7.5,  0,  7.5, -4,    1,  6,
-    -4,    1,  6,   -4,    1,  6,   -4,    1,  6,   -4,    1,  6,
-  };
-
-  nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, width,
-                                                {nntrainer::Tformat::NHWC,
-                                                 nntrainer::Tdatatype::FP32}),
-                           answer_data);
-
-  EXPECT_EQ(output, answer);
-}
-
-/**
- * @brief dequantize nhwc qint4 tensor
- */
-TEST(nntrainer_Tensor, dequantize_05_p) {
-  size_t batch = 1;
-  size_t channel = 10;
-  size_t height = 2;
-  size_t width = 1;
-
-  nntrainer::Tensor input(
-    {batch,
-     channel,
-     height,
-     width,
-     {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT4}},
-    true, nntrainer::Tensor::Initializer::ZEROS);
-
-  input.setScaleFactors({8, 6, 4, 2, 1, -1, -2, -4, -6, -7});
-  input.setZeroPoints({1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
-
-  nntrainer::Tensor output;
-
-  EXPECT_NO_THROW({ output = input.dequantize<float>(1); });
-
-  float answer_data[] = {-8, -6, -4, -2, -1, 1, 2, 4, 6, 7,
-                         -8, -6, -4, -2, -1, 1, 2, 4, 6, 7};
-
-  nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, width,
-                                                {nntrainer::Tformat::NHWC,
-                                                 nntrainer::Tdatatype::FP32}),
-                           answer_data);
-
-  EXPECT_EQ(output, answer);
+  EXPECT_THROW({ input.dequantize(output, 1); }, std::invalid_argument);
 }
 
 int main(int argc, char **argv) {