From f0ee72be8fad66c6916692378292b0a54b0de5a7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 18 May 2018 17:43:46 -0700 Subject: [PATCH] Make the quantize_and_dequantize op use the full quantized range when possible. PiperOrigin-RevId: 197226707 --- .../core/kernels/quantize_and_dequantize_op.h | 75 ++++++++-------------- .../kernels/quantize_and_dequantize_op_test.cc | 32 +++++---- 2 files changed, 43 insertions(+), 64 deletions(-) diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.h b/tensorflow/core/kernels/quantize_and_dequantize_op.h index 3b09ea2..7ba41b4 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op.h +++ b/tensorflow/core/kernels/quantize_and_dequantize_op.h @@ -49,56 +49,37 @@ struct QuantizeAndDequantizeOneScaleImpl { d.memcpyDeviceToHost(&min_range, input_min.data(), sizeof(T)); d.memcpyDeviceToHost(&max_range, input_max.data(), sizeof(T)); - // Make sure the range is symmetric for signed quantization, or start from - // 0 for unsigned quantization. - max_range = std::max(std::abs(max_range), std::abs(min_range)); + // Calculate the range for the simulated integer quantization: + // e.g. [-128,127] for signed = true, num_bits = 8, + // or [0, 255] for signed = false, num_bits = 8. + const int64 min_quantized = signed_input ? -(1ULL << (num_bits - 1)) : 0; + const int64 max_quantized = min_quantized + ((1ULL << num_bits) - 1); - // If both min and max are 0, then the output should be just 0. - if (max_range == 0) { - out.device(d) = input.constant(T(0)); - return; - } - - if (signed_input) { - min_range = -max_range; + // Determine the maximum scaling factor that would scale + // [min_range, max_range] to not exceed [min_quantized, max_quantized], + // while keeping 0 unchanged. + const T scale_from_min_side = (min_quantized * min_range > 0) + ? min_quantized / min_range + : std::numeric_limits::max(); + const T scale_from_max_side = (max_quantized * max_range > 0) + ? max_quantized / max_range + : std::numeric_limits::max(); + auto scale = std::min(scale_from_min_side, scale_from_max_side); - // If it is signed, we try to keep 0.0 being 0 and drop one bucket. For - // example, if it is 8 bits, we have the range [-127, 127]. So for input - // range of [-x, x], the scale should be 254/(2*x). - T scale = static_cast((uint64_t{1} << (num_bits - 1)) - 1) / max_range; - T inverse_scale = T(1.0) / scale; - if (range_given) { - out.device(d) = - ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) * - scale + - T(0.5)) - .floor() * - inverse_scale + - min_range; - } else { - // No need to compare with min and max as they are measured from the - // tensor. - out.device(d) = - ((input - min_range) * scale + T(0.5)).floor() * inverse_scale + - min_range; - } + T inverse_scale = T(1.0) / scale; + if (range_given) { + out.device(d) = + ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) * scale + + T(0.5)) + .floor() * + inverse_scale + + min_range; } else { - min_range = 0; - // If it is unsigned and num_bits == 8, the range with 8 bits is [0, 255]. - // If the input range is [0, x], then the scale is x/255 instead of 254 as - // in the case above. - T scale = static_cast((uint64_t{1} << num_bits) - 1) / max_range; - T inverse_scale = 1.0 / scale; - if (range_given) { - out.device(d) = - ((input.cwiseMin(max_range).cwiseMax(min_range)) * scale + T(0.5)) - .floor() * - inverse_scale; - } else { - // No need to compare with min and max as they are measured from the - // tensor. - out.device(d) = (input * scale + T(0.5)).floor() * inverse_scale; - } + // No need to compare with min and max as they are measured from the + // tensor. + out.device(d) = + ((input - min_range) * scale + T(0.5)).floor() * inverse_scale + + min_range; } } }; diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc index e41df12..f9f20d0 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc +++ b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc @@ -105,13 +105,13 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int8) { AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - // With int8, the tensor is quantized to {-127, -63, 0, 38, 102, 70}. + // With int8, the tensor is quantized to {-128, -64, 0, 38, 102, 71}. // Scale is: 1/127 - // Then it is dequantized to {-1, -63.0/127, 0, 38.0/127, 102.0/127, 70.0/127} + // Then it is dequantized to {-1, -0.5, 0, 38.0/128, 102.0/128, 71.0/128} TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues( - &expected, {-1, -63.0 / 127, 0, 38.0 / 127, 102.0 / 127, 70.0 / 127}); + test::FillValues(&expected, + {-1, -0.5, 0, 38.0 / 128, 102.0 / 128, 71.0 / 128}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. @@ -136,13 +136,13 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int8_V3) { AddInputFromArray(TensorShape({}), {0.0}); // Max AddInputFromArray(TensorShape({}), {8}); // num_bits - // With int8, the tensor is quantized to {-127, -63, 0, 38, 102, 70}. - // Scale is: 1/127 - // Then it is dequantized to {-1, -63.0/127, 0, 38.0/127, 102.0/127, 70.0/127} + // With int8, the tensor is quantized to {-128, -64, 0, 38, 102, 71}. + // Scale is: 1/128 + // Then it is dequantized to {-1, -64.0/128, 0, 38.0/128, 102.0/128, 71.0/128} TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues( - &expected, {-1, -63.0 / 127, 0, 38.0 / 127, 102.0 / 127, 70.0 / 127}); + test::FillValues(&expected, + {-1, -0.5, 0, 38.0 / 128, 102.0 / 128, 71.0 / 128}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. @@ -166,12 +166,11 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int4) { AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - // With int4, the tensor is quantized to {-7, -3, 0, 2, 6, 4}. - // Scale is: 1/7 + // With int4, the tensor is quantized to {-8, -4, 0, 2, 6, 4}. + // Scale is: 1/8 TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues(&expected, - {-1, -3.0 / 7, 0, 2.0 / 7, 6.0 / 7, 4.0 / 7}); + test::FillValues(&expected, {-1, -0.5, 0, 0.25, 0.75, 0.5}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. @@ -196,12 +195,11 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int4_V3) { AddInputFromArray(TensorShape({}), {0.0}); // Max AddInputFromArray(TensorShape({}), {4}); // num_bits - // With int4, the tensor is quantized to {-7, -3, 0, 2, 6, 4}. - // Scale is: 1/7 + // With int4, the tensor is quantized to {-8, -4, 0, 2, 6, 4}. + // Scale is: 1/8 TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues(&expected, - {-1, -3.0 / 7, 0, 2.0 / 7, 6.0 / 7, 4.0 / 7}); + test::FillValues(&expected, {-1, -0.5, 0, 0.25, 0.75, 0.5}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. -- 2.7.4