From 7555534be3c6138cbcca138556fe4dbf4cc6b8ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Mar 2018 00:30:02 -0700 Subject: [PATCH] Handle out of range values when casting from floating point to integer in quantize. PiperOrigin-RevId: 190580805 --- .../lite/kernels/internal/quantization_util.h | 69 ++++++++++- .../kernels/internal/quantization_util_test.cc | 126 +++++++++++++++++++++ .../lite/toco/graph_transformations/quantize.cc | 7 +- 3 files changed, 195 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.h b/tensorflow/contrib/lite/kernels/internal/quantization_util.h index f7706c7..9a04b76 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util.h +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.h @@ -97,6 +97,71 @@ QuantizationParams ChooseQuantizationParams(double rmin, double rmax) { return quantization_params; } +// Converts a floating-point number to an integer. For all inputs x where +// static_cast(x) is legal according to the C++ standard, the result +// is identical to that cast (i.e. the result is x with its fractional part +// truncated whenever that is representable as IntOut). +// +// static_cast would cause undefined behavior for the following cases, which +// have well-defined behavior for this function: +// +// 1. If x is NaN, the result is zero. +// +// 2. If the truncated form of x is above the representable range of IntOut, +// the result is std::numeric_limits::max(). +// +// 3. If the truncated form of x is below the representable range of IntOut, +// the result is std::numeric_limits::min(). +// +// Note that cases #2 and #3 cover infinities as well as finite numbers. +// +// The range of FloatIn must include the range of IntOut, otherwise +// the results are undefined. +// TODO(sfeuz): Replace by absl::SafeCast once available. +template +IntOut SafeCast(FloatIn x) { + static_assert(!std::numeric_limits::is_integer, + "FloatIn is integer"); + static_assert(std::numeric_limits::is_integer, + "IntOut is not integer"); + static_assert(std::numeric_limits::radix == 2, "IntOut is base 2"); + + // Special case NaN, for which the logic below doesn't work. + if (std::isnan(x)) { + return 0; + } + + // Negative values all clip to zero for unsigned results. + if (!std::numeric_limits::is_signed && x < 0) { + return 0; + } + + // Handle infinities. + if (std::isinf(x)) { + return x < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); + } + + // Set exp such that x == f * 2^exp for some f with |f| in [0.5, 1.0), + // unless x is zero in which case exp == 0. Note that this implies that the + // magnitude of x is strictly less than 2^exp. + int exp = 0; + std::frexp(x, &exp); + + // Let N be the number of non-sign bits in the representation of IntOut. If + // the magnitude of x is strictly less than 2^N, the truncated version of x + // is representable as IntOut. The only representable integer for which this + // is not the case is kMin for signed types (i.e. -2^N), but that is covered + // by the fall-through below. + if (exp <= std::numeric_limits::digits) { + return x; + } + + // Handle numbers with magnitude >= 2^N. + return x < 0 ? std::numeric_limits::min() + : std::numeric_limits::max(); +} + // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of NEGATIVE its exponent --- // this is intended as a RIGHT-shift. @@ -135,8 +200,8 @@ void PreprocessSoftmaxScaling(double beta, double input_scale, // Calculate the largest input that will result in a within-bounds intermediate // result within MultiplyByQuantizedMultiplierGreaterThanOne. In other words, // it must not overflow before we reduce the value by multiplication by the -// input multiplier. The negative radius is used as the minimum difference -// in Softmax. +// input multiplier. The negative radius is used as the minimum difference in +// Softmax. int CalculateInputRadius(int input_integer_bits, int input_left_shift); } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc index 4ae2085..3e9a3c2 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc @@ -22,6 +22,132 @@ namespace { using ::testing::Pair; +template +void RunSafeCastTests() { + const IntOut imax = std::numeric_limits::max(); + EXPECT_GT(imax, 0); + const IntOut imin = std::numeric_limits::min(); + const bool s = std::numeric_limits::is_signed; + if (s) { + EXPECT_LT(imin, 0); + } else { + EXPECT_EQ(0, imin); + } + + // Some basic tests. + EXPECT_EQ(SafeCast(static_cast(0.0)), 0); + EXPECT_EQ(SafeCast(static_cast(-0.0)), 0); + EXPECT_EQ(SafeCast(static_cast(0.99)), 0); + EXPECT_EQ(SafeCast(static_cast(1.0)), 1); + EXPECT_EQ(SafeCast(static_cast(1.01)), 1); + EXPECT_EQ(SafeCast(static_cast(1.99)), 1); + EXPECT_EQ(SafeCast(static_cast(2.0)), 2); + EXPECT_EQ(SafeCast(static_cast(2.01)), 2); + EXPECT_EQ(SafeCast(static_cast(-0.99)), 0); + EXPECT_EQ(SafeCast(static_cast(-1.0)), s ? -1 : 0); + EXPECT_EQ(SafeCast(static_cast(-1.01)), s ? -1 : 0); + EXPECT_EQ(SafeCast(static_cast(-1.99)), s ? -1 : 0); + EXPECT_EQ(SafeCast(static_cast(-2.0)), s ? -2 : 0); + EXPECT_EQ(SafeCast(static_cast(-2.01)), s ? -2 : 0); + EXPECT_EQ(SafeCast(static_cast(117.9)), 117); + EXPECT_EQ(SafeCast(static_cast(118.0)), 118); + EXPECT_EQ(SafeCast(static_cast(118.1)), 118); + EXPECT_EQ(SafeCast(static_cast(-117.9)), s ? -117 : 0); + EXPECT_EQ(SafeCast(static_cast(-118.0)), s ? -118 : 0); + EXPECT_EQ(SafeCast(static_cast(-118.1)), s ? -118 : 0); + + // Some edge cases. + EXPECT_EQ(SafeCast(std::numeric_limits::max()), imax); + EXPECT_EQ(SafeCast(std::numeric_limits::lowest()), imin); + EXPECT_EQ(SafeCast(std::numeric_limits::infinity()), imax); + EXPECT_EQ(SafeCast(-std::numeric_limits::infinity()), imin); + EXPECT_EQ(SafeCast(std::numeric_limits::quiet_NaN()), 0); + + // Some larger numbers. + if (sizeof(IntOut) >= 4 && sizeof(FloatIn) > 4) { + EXPECT_EQ(SafeCast(static_cast(0x76543210)), 0x76543210); + } + + if (sizeof(FloatIn) > sizeof(IntOut)) { + // Check values near imax. + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 0.1)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 0.99)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 1.0)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 1.99)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) + 2.0)), + imax); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 0.1)), + imax - 1); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 0.99)), + imax - 1); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 1.0)), + imax - 1); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 1.01)), + imax - 2); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 1.99)), + imax - 2); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 2.0)), + imax - 2); + EXPECT_EQ(SafeCast( + static_cast(static_cast(imax) - 2.01)), + imax - 3); + } + + // Check values considerably larger in magnitude than imin and imax + EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) * 2)), + imax); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) * 20)), + imax); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imax) * 100)), + imax); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imin) * 2)), + imin); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imin) * 20)), + imin); + EXPECT_EQ( + SafeCast(static_cast(static_cast(imin) * 100)), + imin); +} + +TEST(QuantizationUtilTest, SafeCast) { + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); + RunSafeCastTests(); +} + // Example taken from http://www.tensorflow.org/performance/quantization // // Quantized | Float diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index ad3f052..9679ea0 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -65,8 +65,6 @@ std::unique_ptr QuantizeBuffer( static_cast&>(buffer); auto* quantized_buffer = new Buffer; quantized_buffer->data.resize(float_buffer.data.size()); - const auto qmin = static_cast(std::numeric_limits>::min()); - const auto qmax = static_cast(std::numeric_limits>::max()); for (std::size_t i = 0; i < float_buffer.data.size(); i++) { const float src_val = float_buffer.data[i]; double scaled_val; // Astonishingly, using 'float' degrades accuracy just @@ -78,9 +76,8 @@ std::unique_ptr QuantizeBuffer( } else { scaled_val = quantization_params.zero_point + inverse_scale * src_val; } - const auto rounded_val = static_cast(std::round(scaled_val)); - const auto clamped_val = std::min(qmax, std::max(qmin, rounded_val)); - quantized_buffer->data[i] = static_cast>(clamped_val); + quantized_buffer->data[i] = + tflite::SafeCast>(std::round(scaled_val)); } return std::unique_ptr(quantized_buffer); } -- 2.7.4