From 3d428b1427b8f1541892b41cf3b6681bdd8f486a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 22 May 2018 01:01:01 -0700 Subject: [PATCH] Automated g4 rollback of changelist 197487461 PiperOrigin-RevId: 197523867 --- .../core/kernels/quantize_and_dequantize_op.h | 89 +++++++++++----------- .../kernels/quantize_and_dequantize_op_test.cc | 46 +++++------ 2 files changed, 69 insertions(+), 66 deletions(-) diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op.h b/tensorflow/core/kernels/quantize_and_dequantize_op.h index 906d507..3b09ea2 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op.h +++ b/tensorflow/core/kernels/quantize_and_dequantize_op.h @@ -23,8 +23,6 @@ limitations under the License. namespace tensorflow { namespace functor { -// TODO(pauldonnelly): 'signed_input' should really be called 'signed_output'. - template struct QuantizeAndDequantizeOneScaleFunctor { void operator()(const Device& d, typename TTypes::ConstVec input, @@ -51,51 +49,56 @@ struct QuantizeAndDequantizeOneScaleImpl { d.memcpyDeviceToHost(&min_range, input_min.data(), sizeof(T)); d.memcpyDeviceToHost(&max_range, input_max.data(), sizeof(T)); - // Calculate the range for the simulated integer quantization: - // e.g. [-128,127] for signed = true, num_bits = 8, - // or [0, 255] for signed = false, num_bits = 8. - const int64 min_quantized = signed_input ? -(1ULL << (num_bits - 1)) : 0; - const int64 max_quantized = min_quantized + ((1ULL << num_bits) - 1); - - // Determine the maximum scaling factor that would scale - // [min_range, max_range] to not exceed [min_quantized, max_quantized], - // while keeping 0 unchanged. - const T scale_from_min_side = (min_quantized * min_range > 0) - ? min_quantized / min_range - : std::numeric_limits::max(); - const T scale_from_max_side = (max_quantized * max_range > 0) - ? max_quantized / max_range - : std::numeric_limits::max(); + // Make sure the range is symmetric for signed quantization, or start from + // 0 for unsigned quantization. + max_range = std::max(std::abs(max_range), std::abs(min_range)); - // Note: Avoids changing the side of the range that determines scale. - T scale, inverse_scale; - if (scale_from_min_side < scale_from_max_side) { - scale = scale_from_min_side; - inverse_scale = min_range / min_quantized; - max_range = max_quantized * inverse_scale; - } else { - scale = scale_from_max_side; - inverse_scale = max_range / max_quantized; - min_range = min_quantized * inverse_scale; + // If both min and max are 0, then the output should be just 0. + if (max_range == 0) { + out.device(d) = input.constant(T(0)); + return; } - if (range_given) { - // Note: The clamping here is to avoid overflow in the quantized type. - // The semantics of the op does not guarantee to clamp to the specified - // min_range and max_range - because we may have changed either min_range - // or max_range. - out.device(d) = - ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) * scale + - T(0.5)) - .floor() * - inverse_scale + - min_range; + if (signed_input) { + min_range = -max_range; + + // If it is signed, we try to keep 0.0 being 0 and drop one bucket. For + // example, if it is 8 bits, we have the range [-127, 127]. So for input + // range of [-x, x], the scale should be 254/(2*x). + T scale = static_cast((uint64_t{1} << (num_bits - 1)) - 1) / max_range; + T inverse_scale = T(1.0) / scale; + if (range_given) { + out.device(d) = + ((input.cwiseMin(max_range).cwiseMax(min_range) - min_range) * + scale + + T(0.5)) + .floor() * + inverse_scale + + min_range; + } else { + // No need to compare with min and max as they are measured from the + // tensor. + out.device(d) = + ((input - min_range) * scale + T(0.5)).floor() * inverse_scale + + min_range; + } } else { - // No need to clamp to min_range and max_range in this case as they were - // measured from the tensor. - out.device(d) = - ((input - min_range) * scale + T(0.5)).floor() * inverse_scale + - min_range; + min_range = 0; + // If it is unsigned and num_bits == 8, the range with 8 bits is [0, 255]. + // If the input range is [0, x], then the scale is x/255 instead of 254 as + // in the case above. + T scale = static_cast((uint64_t{1} << num_bits) - 1) / max_range; + T inverse_scale = 1.0 / scale; + if (range_given) { + out.device(d) = + ((input.cwiseMin(max_range).cwiseMax(min_range)) * scale + T(0.5)) + .floor() * + inverse_scale; + } else { + // No need to compare with min and max as they are measured from the + // tensor. + out.device(d) = (input * scale + T(0.5)).floor() * inverse_scale; + } } } }; diff --git a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc index 629c698..e41df12 100644 --- a/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc +++ b/tensorflow/core/kernels/quantize_and_dequantize_op_test.cc @@ -105,13 +105,13 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int8) { AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - // With int8, the tensor is quantized to {-128, -64, 0, 38, 102, 71}. + // With int8, the tensor is quantized to {-127, -63, 0, 38, 102, 70}. // Scale is: 1/127 - // Then it is dequantized to {-1, -0.5, 0, 38.0/128, 102.0/128, 71.0/128} + // Then it is dequantized to {-1, -63.0/127, 0, 38.0/127, 102.0/127, 70.0/127} TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues(&expected, - {-1, -0.5, 0, 38.0 / 128, 102.0 / 128, 71.0 / 128}); + test::FillValues( + &expected, {-1, -63.0 / 127, 0, 38.0 / 127, 102.0 / 127, 70.0 / 127}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. @@ -136,13 +136,13 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int8_V3) { AddInputFromArray(TensorShape({}), {0.0}); // Max AddInputFromArray(TensorShape({}), {8}); // num_bits - // With int8, the tensor is quantized to {-128, -64, 0, 38, 102, 71}. - // Scale is: 1/128 - // Then it is dequantized to {-1, -64.0/128, 0, 38.0/128, 102.0/128, 71.0/128} + // With int8, the tensor is quantized to {-127, -63, 0, 38, 102, 70}. + // Scale is: 1/127 + // Then it is dequantized to {-1, -63.0/127, 0, 38.0/127, 102.0/127, 70.0/127} TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues(&expected, - {-1, -0.5, 0, 38.0 / 128, 102.0 / 128, 71.0 / 128}); + test::FillValues( + &expected, {-1, -63.0 / 127, 0, 38.0 / 127, 102.0 / 127, 70.0 / 127}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. @@ -166,11 +166,12 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int4) { AddInputFromArray(TensorShape({}), {0.0}); // Min AddInputFromArray(TensorShape({}), {0.0}); // Max - // With int4, the tensor is quantized to {-8, -4, 0, 2, 6, 4}. - // Scale is: 1/8 + // With int4, the tensor is quantized to {-7, -3, 0, 2, 6, 4}. + // Scale is: 1/7 TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues(&expected, {-1, -0.5, 0, 0.25, 0.75, 0.5}); + test::FillValues(&expected, + {-1, -3.0 / 7, 0, 2.0 / 7, 6.0 / 7, 4.0 / 7}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. @@ -195,11 +196,12 @@ TEST_F(QuantizeAndDequantizeTest, Convert_1D_tensor_with_int4_V3) { AddInputFromArray(TensorShape({}), {0.0}); // Max AddInputFromArray(TensorShape({}), {4}); // num_bits - // With int4, the tensor is quantized to {-8, -4, 0, 2, 6, 4}. - // Scale is: 1/8 + // With int4, the tensor is quantized to {-7, -3, 0, 2, 6, 4}. + // Scale is: 1/7 TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({6})); - test::FillValues(&expected, {-1, -0.5, 0, 0.25, 0.75, 0.5}); + test::FillValues(&expected, + {-1, -3.0 / 7, 0, 2.0 / 7, 6.0 / 7, 4.0 / 7}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); // Ensure that the inputs haven't been changed. @@ -226,14 +228,13 @@ TEST_F(QuantizeAndDequantizeTest, Convert_2D_tensor_with_int8_range_given) { AddInputFromArray(TensorShape({}), {1.0}); // Max // Note that the range is given as [-1, 1]. - // With int8, the tensor is quantized to {-102, -63, 0, 38, 102, 70, -128, + // With int8, the tensor is quantized to {-102, -63, 0, 38, 102, 70, -127, // 127}. // Scale is: 1/127 TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 4})); - test::FillValues( - &expected, {-102.0 / 127, -63.0 / 127, 0, 38.0 / 127, 102.0 / 127, - 70.0 / 127, -128.0 / 127, 1}); + test::FillValues(&expected, {-102.0 / 127, -63.0 / 127, 0, 38.0 / 127, + 102.0 / 127, 70.0 / 127, -1, 1}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); } @@ -257,14 +258,13 @@ TEST_F(QuantizeAndDequantizeTest, Convert_2D_tensor_with_int8_range_given_V3) { AddInputFromArray(TensorShape({}), {8}); // num_bits // Note that the range is given as [-1, 1]. - // With int8, the tensor is quantized to {-102, -63, 0, 38, 102, 70, -128, + // With int8, the tensor is quantized to {-102, -63, 0, 38, 102, 70, -127, // 127}. // Scale is: 1/127 TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 4})); - test::FillValues( - &expected, {-102.0 / 127, -63.0 / 127, 0, 38.0 / 127, 102.0 / 127, - 70.0 / 127, -128.0 / 127, 1}); + test::FillValues(&expected, {-102.0 / 127, -63.0 / 127, 0, 38.0 / 127, + 102.0 / 127, 70.0 / 127, -1, 1}); test::ExpectTensorNear(expected, *GetOutput(0), 1e-5); } -- 2.7.4