const auto* qvd = tensor.data<qint8>();
float* rvd = rv.data<float>();
for (auto i = 0; i < tensor.numel(); ++i) {
- rvd[i] = (static_cast<uint32_t>(qvd[i].val_) - zero_point_) * scale_;
+ // We need to convert the qint8 value to float to ensure the subtraction
+ // subexpression returns a float
+ rvd[i] = (static_cast<float>(qvd[i].val_) - zero_point_) * scale_;
}
return rv;
}
qr = r.quantize_linear(scale, zero_point)
self.assertEqual(qr.item(), 1)
+ def test_qtensor_quant_dequant(self):
+ r = np.random.rand(3, 2) * 2 - 4
+ r = torch.from_numpy(r).float()
+ scale = 2
+ zero_point = 2
+ qr = r.quantize_linear(scale, zero_point)
+ rqr = qr.dequantize()
+ print(r.numpy())
+ print(rqr.numpy())
+ self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2/scale))
+
@unittest.skipIf(torch.cuda.device_count() < 2, 'fewer than 2 GPUs detected')
def test_device_guard(self):
# verify that all operators with `device_guard: False` behave properly with multiple devices.