- Quantized Tensor is now available with Int 4 with scale.
- Two Int 4 values use one Int 8, which each uses 4 bits
- Dequantization is performed by multiplying scaling factors with a given index (b, c, h, w).
- Only read (getValueQint4), write (setValue), and dequantization operations are allowed.
**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped
Signed-off-by: Donghyeon Jeong <dhyeon.jeong@samsung.com>
enum class Format { NCHW, NHWC };
/**
- * @brief Tensor Data Type. Currently QINT8, FP16 & FP32 Support
+ * @brief Tensor Data Type. Currently QINT4, QINT8, FP16 & FP32 Support
*
*/
enum class DataType {
+ QINT4, /** quantized int 4*/
QINT8, /** quantized int 8*/
FP16, /** half precision */
FP32 /** single precision */
* @brief Creator of TensorDim with Format & DataType
*
* @param fm format NCHW | HNWC
- * @param fm DataType QINT8 | FP16 | FP32
+ * @param fm DataType QINT4 | QINT8 | FP16 | FP32
* @param eff_dim_flag_ effective dimension flag (1 means it's effective)
* @param dyn_dim_flag_ dynamic dimension flag (1 means it's unspecified)
*/
* @param h height
* @param w width
* @param fm format NCHW | HNWC
- * @param d_type Data Type QINT8 | FP16 | FP32
+ * @param d_type Data Type QINT4 | QINT8 | FP16 | FP32
* @param eff_dim_flag_ dimension bit flag to calculate the dynamic
* dimension, rightmost is width
*/
*
* @param shape shape of format
* @param fm format NCHW | HNWC
- * @param d_type data type QINT8 | FP16 | FP32
+ * @param d_type data type QINT4 | QINT8 | FP16 | FP32
*/
TensorDim(const std::string &shape, TensorDim::Format fm,
TensorDim::DataType d_type = TensorDim::DataType::FP32);
delete[] mem_data->template getAddr<int8_t>();
delete mem_data;
});
+ } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ mem_data =
+ new MemoryData((void *)(new int8_t[(dim.getDataLen() + 1) / 2]{}));
+ data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
+ delete[] mem_data->template getAddr<int8_t>();
+ delete mem_data;
+ });
}
offset = 0;
initialize();
_data[i] != _rdata[i])
return false;
}
+ } else if (dim.getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ const int8_t *_data = getData<int8_t>();
+ const int8_t *_rdata = rhs.getData<int8_t>();
+ int8_t data, rdata;
+ for (size_t i = 0; i < len; ++i) {
+ /** not checking sign change is intentional to avoid float calculation
+ * errors around 0 */
+ data = decode_qint(_data[i / 2], (i % 2 == 0));
+ rdata = decode_qint(_rdata[i / 2], (i % 2 == 0));
+
+ if ((std::isnan(data) && !std::isnan(rdata)) ||
+ (!std::isnan(data) && std::isnan(rdata)) || data != rdata)
+ return false;
+ }
}
return true;
#endif
} else if (this->getDataType() == ml::train::TensorDim::DataType::QINT8) {
throw std::invalid_argument("Error: RandNormal is invalid for QINT8");
+ } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ throw std::invalid_argument("Error: RandNormal is invalid for QINT4");
}
}
#endif
} else if (this->getDataType() == ml::train::TensorDim::DataType::QINT8) {
throw std::invalid_argument("Error: RandUniform is invalid for QINT8");
+ } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ throw std::invalid_argument("Error: RandUniform is invalid for QINT4");
}
}
#endif
} else if (this->getDataType() == ml::train::TensorDim::DataType::QINT8) {
throw std::invalid_argument("Error: setRandBernoulli is invalid for QINT8");
+ } else if (this->getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ throw std::invalid_argument("Error: setRandBernoulli is invalid for QINT4");
}
}
}
out.copyfmt(init);
}
+ } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ const int8_t *data = getData<int8_t>();
+ unsigned int len = size();
+ out << "data addr: " << (float *)data << '\n';
+ out << dim;
+
+ if (len > 100) {
+ out << '[' << (int)data[0] << ' ' << (int)data[1] << ' ' << (int)data[2]
+ << " ... " << (int)data[len - 3] << ' ' << (int)data[len - 2] << ' '
+ << (int)data[len - 1] << ']' << std::endl;
+ return;
+ }
+
+ std::ios init(NULL);
+ init.copyfmt(out);
+ if (getFormat() == Tformat::NCHW) {
+ for (unsigned int k = 0; k < batch(); k++) {
+ for (unsigned int l = 0; l < channel(); l++) {
+ for (unsigned int i = 0; i < height(); i++) {
+ for (unsigned int j = 0; j < width(); j++) {
+ out << std::setw(10) << (int)this->getValueQint4(k, l, i, j)
+ << " ";
+ }
+ out << std::endl;
+ }
+ out << std::endl;
+ }
+ out << "-------" << std::endl;
+ }
+ } else {
+ for (unsigned int k = 0; k < batch(); k++) {
+ for (unsigned int i = 0; i < height(); i++) {
+ for (unsigned int j = 0; j < width(); j++) {
+ for (unsigned int l = 0; l < channel(); l++) {
+ out << std::setw(10) << (int)this->getValueQint4(k, l, i, j)
+ << " ";
+ }
+ out << std::endl;
+ }
+ out << std::endl;
+ }
+ out << "-------" << std::endl;
+ }
+ out.copyfmt(init);
+ }
}
}
if (buf == getData<int8_t>()) {
return;
}
+ } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ if (buf == getData<int8_t>()) {
+ return;
+ }
}
if (getDataType() == ml::train::TensorDim::DataType::FP32) {
for (unsigned int i = 0; i < size(); ++i) {
getData<int8_t>()[i] = ((int8_t *)buf)[i];
}
+ } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ for (unsigned int i = 0; i < (size() + 1) / 2; ++i) {
+ getData<int8_t>()[i] = ((int8_t *)buf)[i];
+ }
}
}
} else if (getDataType() == ml::train::TensorDim::DataType::QINT8) {
int8_t *data = getData<int8_t>();
std::fill(data, data + size(), val);
+ } else if (getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ int8_t *data = getData<int8_t>();
+ int8_t mixed = encode_qint(val, val);
+ std::fill(data, data + (size() + 1) / 2, mixed);
}
}
#endif
} else if (dim.getDataType() == ml::train::TensorDim::DataType::QINT8) {
apply_i<int8_t>([](int8_t val) -> int8_t { return 0; });
+ } else if (dim.getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ apply_i<int8_t>([](int8_t val) -> int8_t { return 0; });
}
}
return output;
}
-void Tensor::setScaleFactors(std::vector<float> scales) {
- if (!scale_factors.empty()) {
- throw std::invalid_argument("Error: scale factors already been set");
+int8_t Tensor::encode_qint(int8_t high, int8_t low) const {
+ return (high << 4) | (low & 0x0f);
+};
+
+int8_t Tensor::decode_qint(int8_t val, bool isHigh) const {
+ if (isHigh) {
+ val = val >> 4;
+ } else {
+ val = val << 4;
+ val = val >> 4;
+ }
+
+ return val;
+}
+
+void Tensor::setScaleFactors(std::vector<float> scales, int idx) {
+ if (scales.empty() || idx < 0 || idx > 3) {
+ throw std::invalid_argument("Error: invalid parameter");
}
- if (scales.size() != channel()) {
+ if (idx == 0 && scales.size() != batch()) {
+ throw std::invalid_argument("Error: scale_factors.size() != batch() ");
+ }
+
+ if (idx == 1 && scales.size() != channel()) {
throw std::invalid_argument("Error: scale_factors.size() != channel() ");
}
+ if (idx == 2 && scales.size() != height()) {
+ throw std::invalid_argument("Error: scale_factors.size() != height() ");
+ }
+
+ if (idx == 3 && scales.size() != width()) {
+ throw std::invalid_argument("Error: scale_factors.size() != width() ");
+ }
+
scale_factors = scales;
+ scale_idx = idx;
}
std::vector<float> Tensor::getScaleFactors() { return scale_factors; }
throw std::invalid_argument("Error: Tensor cannot be dequantized");
}
- if (output.getDataType() == Tdatatype::QINT8) {
+ if (output.getDataType() == Tdatatype::QINT8 ||
+ output.getDataType() == Tdatatype::QINT4) {
throw std::invalid_argument("Error: Target datatype is quantized type");
}
throw std::invalid_argument("Error: No scale factors");
}
- for (unsigned int c = 0; c < channel(); ++c) {
- for (unsigned int b = 0; b < batch(); ++b) {
+ int idx;
+ for (unsigned int b = 0; b < batch(); ++b) {
+ for (unsigned int c = 0; c < channel(); ++c) {
for (unsigned int h = 0; h < height(); ++h) {
for (unsigned int w = 0; w < width(); ++w) {
+ if (scale_idx == 0)
+ idx = b;
+ else if (scale_idx == 1)
+ idx = c;
+ else if (scale_idx == 2)
+ idx = h;
+ else if (scale_idx == 3)
+ idx = w;
+
if (output.getDataType() == Tdatatype::FP32) {
- output.setValue(b, c, h, w,
- (float)getValue<int8_t>(b, c, h, w) *
- scale_factors[c]);
+ if (getDataType() == Tdatatype::QINT8) {
+ output.setValue(b, c, h, w,
+ (float)getValue<int8_t>(b, c, h, w) *
+ scale_factors[idx]);
+ } else {
+ output.setValue(b, c, h, w,
+ (float)getValueQint4(b, c, h, w) *
+ scale_factors[idx]);
+ }
} else if (output.getDataType() == Tdatatype::FP16) {
#ifdef ENABLE_FP16
- output.setValue(b, c, h, w,
- (_FP16)getValue<int8_t>(b, c, h, w) *
- (_FP16)scale_factors[c]);
+ if (getDataType() == Tdatatype::QINT8) {
+ output.setValue(b, c, h, w,
+ (_FP16)getValue<int8_t>(b, c, h, w) *
+ (_FP16)scale_factors[idx]);
+ } else {
+ output.setValue(b, c, h, w,
+ (_FP16)getValueQint4(b, c, h, w) *
+ (_FP16)scale_factors[idx]);
+ }
#else
throw std::invalid_argument("Error: enable-fp16 is not enabled");
#endif
"[Tensor] trying to initialize Tensor from empty vector");
}
- if (t_type.data_type != Tdatatype::QINT8) {
+ if (t_type.data_type != Tdatatype::QINT8 &&
+ t_type.data_type != Tdatatype::QINT4) {
throw std::out_of_range(
"[Tensor] TensorType do not match with input data type");
}
strides = dim.computeStrides();
MemoryData *mem_data =
- new MemoryData((void *)(new int8_t[dim.getDataLen()]()));
+ (t_type.data_type == Tdatatype::QINT8)
+ ? new MemoryData((void *)(new int8_t[dim.getDataLen()]()))
+ : new MemoryData((void *)(new int8_t[(dim.getDataLen() + 1) / 2]()));
data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
delete[] mem_data->getAddr<int8_t>();
});
*/
template <typename T = float>
const T &getValue(unsigned int idx) const noexcept {
+ if (getDataType() == Tdatatype::QINT4) {
+ return getData<T>()[idx / 2];
+ }
return getData<T>()[idx];
}
* @param[in] idx location
*/
template <typename T = float> T &getValue(unsigned int idx) noexcept {
+ if (getDataType() == Tdatatype::QINT4) {
+ return getData<T>()[idx / 2];
+ }
return getData<T>()[idx];
}
/**
+ * @brief return value at specific location
+ * @param[in] idx location
+ * @retval qint4 value in location
+ */
+ int8_t getValueQint4(unsigned int idx) const noexcept {
+ int8_t value = getData<int8_t>()[idx / 2];
+ return decode_qint(value, (idx % 2 == 0));
+ }
+
+ /**
+ * @brief return value at specific location
+ * @param[in] idx location
+ * @retval qint4 value in location
+ */
+ int8_t getValueQint4(unsigned int idx) noexcept {
+ int8_t value = getData<int8_t>()[idx / 2];
+ return decode_qint(value, (idx % 2 == 0));
+ }
+
+ /**
+ * @brief return value at specific location
+ * @param[in] b batch location
+ * @param[in] c channel location
+ * @param[in] h height location
+ * @param[in] w width location
+ * @retval qint4 value in location
+ */
+ int8_t getValueQint4(unsigned int b, unsigned int c, unsigned int h,
+ unsigned int w) const noexcept {
+ size_t idx = getIndex(b, c, h, w);
+ int8_t value = getData<int8_t>()[idx / 2];
+ return decode_qint(value, (idx % 2 == 0));
+ }
+
+ /**
+ * @brief return value at specific location
+ * @param[in] b batch location
+ * @param[in] c channel location
+ * @param[in] h height location
+ * @param[in] w width location
+ * @retval qint4 value in location
+ */
+ int8_t getValueQint4(unsigned int b, unsigned int c, unsigned int h,
+ unsigned int w) noexcept {
+ size_t idx = getIndex(b, c, h, w);
+ int8_t value = getData<int8_t>()[idx / 2];
+ return decode_qint(value, (idx % 2 == 0));
+ }
+
+ /**
* @brief Get the Value thinking that it is padded
* for example, for the tensor (virtually padded) below,
* getValue(0, 0, 2, 2, 1, 1, .0f) will return 5
if (i > index) {
return nullptr;
}
+ if (getDataType() == Tdatatype::QINT4)
+ return &getData<T>()[i / 2];
return &getData<T>()[i];
}
return nullptr;
}
+ if (getDataType() == Tdatatype::QINT4)
+ return &getData<T>()[i / 2];
return &getData<T>()[i];
}
* @brief Get size of the data in bytes
* @retval size_t Size in bytes
*/
- size_t bytes() const { return size() * dim.getDataTypeSize(); }
+ size_t bytes() const {
+ if (getDataType() == Tdatatype::QINT4) {
+ return (dim.batch() + 1) * (dim.channel() + 1) * (dim.height() + 1) *
+ (dim.width() + 1) / 16 * dim.getDataTypeSize();
+ }
+ return size() * dim.getDataTypeSize();
+ }
/**
* @brief Set the element value
#endif
} else if (getDataType() == Tdatatype::QINT8) {
getData<int8_t>()[getIndex(batch, c, h, w)] = value;
+ } else if (getDataType() == Tdatatype::QINT4) {
+ int idx = getIndex(batch, c, h, w);
+
+ if (idx % 2 == 0) {
+ getData<int8_t>()[idx / 2] =
+ encode_qint(value, getData<int8_t>()[idx / 2]);
+ } else {
+ getData<int8_t>()[idx / 2] =
+ encode_qint(getData<int8_t>()[idx / 2] >> 4, value);
+ }
}
}
* @brief Set scale factors of the tensor
* @param[in] scales scale factors
*/
- void setScaleFactors(std::vector<float> scales);
+ void setScaleFactors(std::vector<float> scales, int idx);
/**
* @brief Get scale factors of the tensor
std::string name; /**< name of the tensor */
std::shared_ptr<MemoryData> data;
size_t offset;
+ int scale_idx;
std::vector<float> scale_factors;
/**<
*/
Tensor rotate_180(Tensor in);
+ /**
+ * @brief Encode two int4 values to one int8 value
+ * @param[in] high value for first 4 bits
+ * @param[in] low value for last 4 bits
+ * @retval Encoded value
+ */
+ int8_t encode_qint(int8_t high, int8_t low) const;
+
+ /**
+ * @brief Decode int8 value to a int4 value
+ * @param[in] idx index to retrieve value
+ * @retval Decoded value
+ */
+ int8_t decode_qint(int8_t val, bool isHigh) const;
+
}; // namespace nntrainer
/**
return sizeof(float);
case TensorDim::DataType::QINT8:
return sizeof(int8_t);
+ case TensorDim::DataType::QINT4:
+ return sizeof(int8_t);
default:
return sizeof(float);
}
type_ = "FP16";
} else if (d.getDataType() == ml::train::TensorDim::DataType::QINT8) {
type_ = "QINT8";
+ } else if (d.getDataType() == ml::train::TensorDim::DataType::QINT4) {
+ type_ = "QINT4";
}
std::string format_ =
EXPECT_EQ(status, ML_ERROR_NONE);
}
+TEST(nntrainer_Tensor, Tensor_05_p) {
+ int status = ML_ERROR_NONE;
+ std::vector<std::vector<std::vector<int8_t>>> in = {{{-8, -7}, {-6, -5}},
+ {{-4, -3}, {-2, -1}},
+ {{0, 1}, {2, 3}},
+ {{4, 5}, {6, 7}}};
+
+ nntrainer::Tensor tensor = nntrainer::Tensor(
+ in, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4});
+ ASSERT_NE(nullptr, tensor.getData<int8_t>());
+
+ for (size_t b = 0; b < tensor.batch(); ++b) {
+ for (size_t c = 0; c < tensor.channel(); ++c) {
+ for (size_t h = 0; h < tensor.height(); ++h) {
+ for (size_t w = 0; w < tensor.width(); ++w) {
+ size_t idx = tensor.getIndex(b, c, h, w);
+ ASSERT_EQ(idx - 8, tensor.getValueQint4(idx));
+ }
+ }
+ }
+ }
+}
+
+TEST(nntrainer_Tensor, Tensor_06_p) {
+ int status = ML_ERROR_NONE;
+ nntrainer::Tensor tensor = nntrainer::Tensor(
+ 1, 4, 2, 2, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4});
+ ASSERT_NE(nullptr, tensor.getData<int8_t>());
+
+ tensor.setValue(-2);
+
+ for (size_t b = 0; b < tensor.batch(); ++b) {
+ for (size_t c = 0; c < tensor.channel(); ++c) {
+ for (size_t h = 0; h < tensor.height(); ++h) {
+ for (size_t w = 0; w < tensor.width(); ++w) {
+ size_t idx = tensor.getIndex(b, c, h, w);
+ ASSERT_EQ(-2, tensor.getValueQint4(idx));
+ }
+ }
+ }
+ }
+}
+
TEST(nntrainer_Tensor, multiply_i_01_p) {
int status = ML_ERROR_NONE;
int batch = 3;
EXPECT_TRUE(t.isAllocated());
}
+TEST(nntrainer_Tensor, allocate_04_p) {
+ nntrainer::Tensor t(
+ {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}},
+ true);
+ EXPECT_TRUE(t.isAllocated());
+
+ t.allocate();
+ EXPECT_TRUE(t.isAllocated());
+}
+
+TEST(nntrainer_Tensor, allocate_05_p) {
+ nntrainer::Tensor t(
+ {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}},
+ true);
+ EXPECT_TRUE(t.isAllocated());
+
+ t.allocate();
+ EXPECT_TRUE(t.isAllocated());
+}
+
TEST(nntrainer_Tensor, initialize_01_p) {
nntrainer::Tensor t({1, 2, 3, 4}, true, nntrainer::Tensor::Initializer::ONES);
EXPECT_EQ(golden, t);
}
+TEST(nntrainer_Tensor, initialize_09_p) {
+ nntrainer::Tensor t(
+ {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, true,
+ nntrainer::Tensor::Initializer::ONES);
+ nntrainer::Tensor golden(
+ {1, 2, 3, 4, {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}}, true,
+ nntrainer::Tensor::Initializer::ZEROS);
+
+ EXPECT_NE(golden, t);
+
+ golden.initialize(nntrainer::Tensor::Initializer::ONES);
+ EXPECT_EQ(golden, t);
+}
+
TEST(nntrainer_Tensor, split_01_p) {
{
nntrainer::TensorDim ref_dim(3, 2, 4, 5);
nntrainer::Tensor input(batch, channel, height, width);
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output(batch, channel, height, width);
batch + 1, channel, height + 1, width + 1,
{nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output(batch, channel, height, width);
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
EXPECT_THROW(
{
- input.setScaleFactors({2.0, 1.5, 1.0, 0.5});
+ input.setScaleFactors({2.0, 1.5, 1.0, 0.5}, 1);
},
std::invalid_argument);
+
+ EXPECT_NO_THROW({ input.setScaleFactors({2.0, 1.5, 1.0, 0.5}, 2); });
}
/**
batch, channel, height, width,
{nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output(
batch, channel, height, width,
batch, channel, height, width,
{nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output;
* @brief dequantize tensor
*/
TEST(nntrainer_Tensor, dequantize_07_p) {
- int batch = 1;
- int channel = 3;
- int height = 4;
- int width = 5;
+ size_t batch = 1;
+ size_t channel = 3;
+ size_t height = 4;
+ size_t width = 5;
nntrainer::Tensor input(
- batch, channel, height, width,
- {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
- GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ {batch,
+ channel,
+ height,
+ width,
+ {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}},
+ true, nntrainer::Tensor::Initializer::ONES);
+ nntrainer::Tensor output(batch, channel, height, width);
+
+ // Dequantize by channel
+ EXPECT_NO_THROW(input.setScaleFactors({-2, 2, 4}, 1));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ float answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
+
+ nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP32}),
+ answer_data_1);
+
+ EXPECT_EQ(output, answer1);
+
+ // Dequantize by height
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4.8}, 2));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ float answer_data_2[] = {
+ -4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2, -2, -2, 2, 2,
+ 2, 2, 2, 4.8, 4.8, 4.8, 4.8, 4.8, -4.2, -4.2, -4.2, -4.2,
+ -4.2, -2, -2, -2, -2, -2, 2, 2, 2, 2, 2, 4.8,
+ 4.8, 4.8, 4.8, 4.8, -4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2,
+ -2, -2, 2, 2, 2, 2, 2, 4.8, 4.8, 4.8, 4.8, 4.8};
+ nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP32}),
+ answer_data_2);
+
+ EXPECT_EQ(output, answer2);
+
+ // Dequantize by width
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4, -8}, 3));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ float answer_data_3[] = {
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8};
+
+ nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP32}),
+ answer_data_3);
+
+ EXPECT_EQ(output, answer3);
+}
+/**
+ * @brief dequantize qint4 tensor
+ */
+TEST(nntrainer_Tensor, dequantize_08_p) {
+ size_t batch = 1;
+ size_t channel = 3;
+ size_t height = 4;
+ size_t width = 5;
+
+ nntrainer::Tensor input(
+ {batch,
+ channel,
+ height,
+ width,
+ {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}},
+ true, nntrainer::Tensor::Initializer::ONES);
nntrainer::Tensor output(batch, channel, height, width);
+ // Dequantize by channel
+ EXPECT_NO_THROW(input.setScaleFactors({-2, 2, 4}, 1));
EXPECT_NO_THROW({ input.dequantize(output); });
- float answer_data[] = {
- 1.5, 1.5, 1.5, 1.5, 1.5, 3, 3, 3, 3, 3, 4.5, 4.5, 4.5, 4.5, 4.5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7,
- 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 5.5, 5.5, 5.5, 5.5, 5.5,
- 6, 6, 6, 6, 6, 6.5, 6.5, 6.5, 6.5, 6.5, 7, 7, 7, 7, 7};
+ float answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
- nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, width,
- {nntrainer::Tformat::NCHW,
- nntrainer::Tdatatype::FP32}),
- answer_data);
+ nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP32}),
+ answer_data_1);
- EXPECT_EQ(output, answer);
+ EXPECT_EQ(output, answer1);
+
+ // Dequantize by height
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4}, 2));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ float answer_data_2[] = {-4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2, -2, -2,
+ 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
+ -4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2, -2, -2,
+ 2, 2, 2, 2, 2, 4, 4, 4, 4, 4,
+ -4.2, -4.2, -4.2, -4.2, -4.2, -2, -2, -2, -2, -2,
+ 2, 2, 2, 2, 2, 4, 4, 4, 4, 4};
+ nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP32}),
+ answer_data_2);
+
+ EXPECT_EQ(output, answer2);
+
+ // Dequantize by width
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4, -8}, 3));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ float answer_data_3[] = {
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8,
+ -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8, -4.2, -2, 2, 4, -8};
+
+ nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP32}),
+ answer_data_3);
+
+ EXPECT_EQ(output, answer3);
}
int main(int argc, char **argv) {
}
}
+/**
+ * @brief dequantize FP16 tensor
+ */
+TEST(nntrainer_Tensor, dequantize_01_n) {
+ int batch = 1;
+ int channel = 3;
+ int height = 4;
+ int width = 5;
+
+ nntrainer::Tensor input(batch, channel, height, width,
+ nntrainer::Tformat::NCHW, nntrainer::Tdatatype::FP16);
+ GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
+
+ nntrainer::Tensor output(batch, channel, height, width,
+ nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16);
+
+ EXPECT_THROW({ input.dequantize(output); }, std::invalid_argument);
+}
+
+/**
+ * @brief dequantize tensor with different dimension
+ */
+TEST(nntrainer_Tensor, dequantize_02_n) {
+ int batch = 1;
+ int channel = 3;
+ int height = 4;
+ int width = 5;
+
+ nntrainer::Tensor input(
+ batch + 1, channel, height + 1, width + 1,
+ {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
+ GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
+
+ nntrainer::Tensor output(batch, channel, height, width,
+ nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16);
+
+ EXPECT_THROW({ input.dequantize(output); }, std::invalid_argument);
+}
+
+/**
+ * @brief dequantize tensor with no scale factors
+ */
+TEST(nntrainer_Tensor, dequantize_03_n) {
+ int batch = 1;
+ int channel = 3;
+ int height = 4;
+ int width = 5;
+
+ nntrainer::Tensor input(
+ batch, channel, height, width,
+ {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
+ GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
+
+ nntrainer::Tensor output(batch, channel, height, width,
+ nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16);
+
+ EXPECT_THROW({ input.dequantize(output); }, std::invalid_argument);
+}
+
+/**
+ * @brief dequantize qint8 tensor to fp16
+ */
+TEST(nntrainer_Tensor, dequantize_04_p) {
+ int batch = 1;
+ int channel = 3;
+ int height = 4;
+ int width = 5;
+
+ nntrainer::Tensor input(
+ batch, channel, height, width,
+ {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
+ GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
+
+ nntrainer::Tensor output;
+
+ EXPECT_NO_THROW({ output = input.dequantize(nntrainer::Tdatatype::FP16); });
+
+ _FP16 answer_data[] = {
+ static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), static_cast<_FP16>(1.5),
+ static_cast<_FP16>(1.5), static_cast<_FP16>(1.5), static_cast<_FP16>(3),
+ static_cast<_FP16>(3), static_cast<_FP16>(3), static_cast<_FP16>(3),
+ static_cast<_FP16>(3), static_cast<_FP16>(4.5), static_cast<_FP16>(4.5),
+ static_cast<_FP16>(4.5), static_cast<_FP16>(4.5), static_cast<_FP16>(4.5),
+ static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6),
+ static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6),
+ static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6),
+ static_cast<_FP16>(6), static_cast<_FP16>(7), static_cast<_FP16>(7),
+ static_cast<_FP16>(7), static_cast<_FP16>(7), static_cast<_FP16>(7),
+ static_cast<_FP16>(8), static_cast<_FP16>(8), static_cast<_FP16>(8),
+ static_cast<_FP16>(8), static_cast<_FP16>(8), static_cast<_FP16>(9),
+ static_cast<_FP16>(9), static_cast<_FP16>(9), static_cast<_FP16>(9),
+ static_cast<_FP16>(9), static_cast<_FP16>(5.5), static_cast<_FP16>(5.5),
+ static_cast<_FP16>(5.5), static_cast<_FP16>(5.5), static_cast<_FP16>(5.5),
+ static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6),
+ static_cast<_FP16>(6), static_cast<_FP16>(6), static_cast<_FP16>(6.5),
+ static_cast<_FP16>(6.5), static_cast<_FP16>(6.5), static_cast<_FP16>(6.5),
+ static_cast<_FP16>(6.5), static_cast<_FP16>(7), static_cast<_FP16>(7),
+ static_cast<_FP16>(7), static_cast<_FP16>(7), static_cast<_FP16>(7)};
+
+ nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16}),
+ answer_data);
+
+ EXPECT_EQ(output, answer);
+}
+
+/**
+ * @brief dequantize qint8 tensor to fp16
+ */
+TEST(nntrainer_Tensor, dequantize_05_p) {
+ size_t batch = 1;
+ size_t channel = 3;
+ size_t height = 4;
+ size_t width = 5;
+
+ nntrainer::Tensor input(
+ {batch,
+ channel,
+ height,
+ width,
+ {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8}},
+ true, nntrainer::Tensor::Initializer::ONES);
+ nntrainer::Tensor output(batch, channel, height, width,
+ nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16);
+
+ // Dequantize by channel
+ EXPECT_NO_THROW(input.setScaleFactors({-2, 2, 4}, 1));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
+
+ nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16}),
+ answer_data_1);
+
+ EXPECT_EQ(output, answer1);
+
+ // Dequantize by height
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4.8}, 2));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(4.8), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(4.8), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(4.8), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(4.8), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(4.8), static_cast<_FP16>(4.8),
+ static_cast<_FP16>(4.8), static_cast<_FP16>(4.8)};
+ nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16}),
+ answer_data_2);
+
+ EXPECT_EQ(output, answer2);
+
+ // Dequantize by width
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4, -8}, 3));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8)};
+
+ nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16}),
+ answer_data_3);
+
+ EXPECT_EQ(output, answer3);
+}
+
+/**
+ * @brief dequantize qint4 tensor
+ */
+TEST(nntrainer_Tensor, dequantize_06_p) {
+ size_t batch = 1;
+ size_t channel = 3;
+ size_t height = 4;
+ size_t width = 5;
+
+ nntrainer::Tensor input(
+ {batch,
+ channel,
+ height,
+ width,
+ {nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT4}},
+ true, nntrainer::Tensor::Initializer::ONES);
+ nntrainer::Tensor output(batch, channel, height, width,
+ nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16);
+
+ // Dequantize by channel
+ EXPECT_NO_THROW(input.setScaleFactors({-2, 2, 4}, 1));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ _FP16 answer_data_1[] = {-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
+ -2, -2, -2, -2, -2, -2, -2, -2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
+
+ nntrainer::Tensor answer1(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16}),
+ answer_data_1);
+
+ EXPECT_EQ(output, answer1);
+
+ // Dequantize by height
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4}, 2));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ _FP16 answer_data_2[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(4), static_cast<_FP16>(4),
+ static_cast<_FP16>(4), static_cast<_FP16>(4),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(4), static_cast<_FP16>(4),
+ static_cast<_FP16>(4), static_cast<_FP16>(4),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(4), static_cast<_FP16>(4),
+ static_cast<_FP16>(4), static_cast<_FP16>(4)};
+ nntrainer::Tensor answer2(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16}),
+ answer_data_2);
+
+ EXPECT_EQ(output, answer2);
+
+ // Dequantize by width
+ EXPECT_NO_THROW(input.setScaleFactors({-4.2, -2, 2, 4, -8}, 3));
+ EXPECT_NO_THROW({ input.dequantize(output); });
+
+ _FP16 answer_data_3[] = {static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8),
+ static_cast<_FP16>(-4.2), static_cast<_FP16>(-2),
+ static_cast<_FP16>(2), static_cast<_FP16>(4),
+ static_cast<_FP16>(-8), static_cast<_FP16>(-4.2),
+ static_cast<_FP16>(-2), static_cast<_FP16>(2),
+ static_cast<_FP16>(4), static_cast<_FP16>(-8)};
+
+ nntrainer::Tensor answer3(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::FP16}),
+ answer_data_3);
+
+ EXPECT_EQ(output, answer3);
+}
+
GTEST_API_ int main(int argc, char **argv) {
int result = -1;
batch, channel, height, width,
{nntrainer::Tformat::NCHW, nntrainer::Tdatatype::QINT8});
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output(
batch, channel, height, width,
batch, channel, height, width,
{nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT8});
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output(
batch, channel, height, width,
batch, channel, height, width,
{nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT8});
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output;
output.getDim().setFormat(nntrainer::Tformat::NHWC);
batch, channel, height, width,
{nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT8});
GEN_TEST_INPUT(input, i * (batch * height) + j * (width) + k + 1);
- input.setScaleFactors({1.5, 1.0, 0.5});
+ input.setScaleFactors({1.5, 1.0, 0.5}, 1);
nntrainer::Tensor output(
batch, channel, height, width,
EXPECT_EQ(output, answer);
}
+/**
+ * @brief dequantize nhwc qint4 tensor
+ */
+TEST(nntrainer_Tensor, dequantize_05_p) {
+ size_t batch = 1;
+ size_t channel = 10;
+ size_t height = 2;
+ size_t width = 1;
+
+ nntrainer::Tensor input(
+ {batch,
+ channel,
+ height,
+ width,
+ {nntrainer::Tformat::NHWC, nntrainer::Tdatatype::QINT4}},
+ true, nntrainer::Tensor::Initializer::ONES);
+ input.setScaleFactors({-8, -6, -4, -2, -1, 1, 2, 4, 6, 7}, 1);
+ nntrainer::Tensor output;
+
+ EXPECT_NO_THROW({ output = input.dequantize(nntrainer::Tdatatype::FP32); });
+
+ float answer_data[] = {-8, -6, -4, -2, -1, 1, 2, 4, 6, 7,
+ -8, -6, -4, -2, -1, 1, 2, 4, 6, 7};
+
+ nntrainer::Tensor answer(ml::train::TensorDim(batch, channel, height, width,
+ {nntrainer::Tformat::NHWC,
+ nntrainer::Tdatatype::FP32}),
+ answer_data);
+
+ EXPECT_EQ(output, answer);
+}
+
int main(int argc, char **argv) {
int result = -1;
/**
* @brief qint8 tensors reuse fp32 tensor memory space
*/
-TEST(TensorPool, validate_memory_reuse_p) {
+TEST(TensorPool, validate_memory_reuse_01_p) {
// |--------- t1 ---------|
// |-t2-||-t3-||-t4-||-t5-|
nntrainer::TensorPool pool;
}
/**
+ * @brief qint4 tensors reuse fp32 tensor memory space
+ */
+TEST(TensorPool, validate_memory_reuse_02_p) {
+ // |--------- t1 ---------|
+ // |-t2-||-t3-||-t4-||-t5-|
+ nntrainer::TensorPool pool;
+ nntrainer::Tensor *t1 = nullptr, *t2 = nullptr, *t3 = nullptr, *t4 = nullptr,
+ *t5 = nullptr;
+
+ EXPECT_NO_THROW(
+ t1 = pool.request("t1", nntrainer::TensorDim({4}), {0},
+ nntrainer::TensorLifespan::FORWARD_FUNC_LIFESPAN));
+ EXPECT_NE(t1, nullptr);
+ EXPECT_FALSE(t1->isAllocated());
+
+ EXPECT_NO_THROW(
+ t2 = pool.request("t2",
+ nntrainer::TensorDim({8}, {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::QINT4}),
+ {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN));
+ EXPECT_NE(t2, nullptr);
+ EXPECT_FALSE(t2->isAllocated());
+
+ EXPECT_NO_THROW(
+ t3 = pool.request("t3",
+ nntrainer::TensorDim({7}, {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::QINT4}),
+ {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN));
+ EXPECT_NE(t3, nullptr);
+ EXPECT_FALSE(t3->isAllocated());
+
+ EXPECT_NO_THROW(
+ t4 = pool.request("t4",
+ nntrainer::TensorDim({8}, {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::QINT4}),
+ {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN));
+ EXPECT_NE(t4, nullptr);
+ EXPECT_FALSE(t4->isAllocated());
+
+ EXPECT_NO_THROW(
+ t5 = pool.request("t5",
+ nntrainer::TensorDim({7}, {nntrainer::Tformat::NCHW,
+ nntrainer::Tdatatype::QINT4}),
+ {1}, nntrainer::TensorLifespan::BACKWARD_FUNC_LIFESPAN));
+ EXPECT_NE(t5, nullptr);
+ EXPECT_FALSE(t5->isAllocated());
+
+ EXPECT_NO_THROW(pool.finalize(nntrainer::OptimizedV1Planner(), 0, 2));
+ EXPECT_EQ(pool.minMemoryRequirement(), t1->bytes());
+
+ EXPECT_NO_THROW(pool.allocate());
+
+ EXPECT_EQ(t1->getAddress<float>(0), (float *)t2->getAddress<int8_t>(0));
+ EXPECT_EQ(t1->getAddress<float>(1), (float *)t3->getAddress<int8_t>(0));
+ EXPECT_EQ(t1->getAddress<float>(2), (float *)t4->getAddress<int8_t>(0));
+ EXPECT_EQ(t1->getAddress<float>(3), (float *)t5->getAddress<int8_t>(0));
+
+ EXPECT_NO_THROW(pool.deallocate());
+}
+
+/**
* @brief check if data span of two tensor testOverlap
*
* @param t1 tensor1