From 9f332ea94b21aff6a73089db4d8e147748fb6ff6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 23 May 2018 22:33:53 -0700 Subject: [PATCH] Add unit tests to tflite kernels PiperOrigin-RevId: 197842122 --- tensorflow/contrib/lite/kernels/internal/BUILD | 86 ++++++ .../kernels/internal/depthwiseconv_float_test.cc | 162 ++++++++++ .../internal/depthwiseconv_quantized_test.cc | 330 ++++++++++++++++++++ .../lite/kernels/internal/log_quantized_test.cc | 333 +++++++++++++++++++++ .../kernels/internal/logsoftmax_quantized_test.cc | 241 +++++++++++++++ .../kernels/internal/resize_bilinear_float_test.cc | 102 +++++++ .../kernels/internal/softmax_quantized_test.cc | 227 ++++++++++++++ .../contrib/lite/kernels/internal/test_util.cc | 121 ++++++++ .../contrib/lite/kernels/internal/test_util.h | 104 +++++++ tensorflow/contrib/lite/kernels/internal/types.h | 1 + 10 files changed, 1707 insertions(+) create mode 100644 tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/test_util.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/test_util.h diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index aabbb06..0a5223b 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -420,6 +420,15 @@ cc_library( }), ) +cc_library( + name = "test_util", + srcs = ["test_util.cc"], + hdrs = ["test_util.h"], + deps = [ + ":types", + ], +) + cc_test( name = "tensor_utils_test", srcs = ["tensor_utils_test.cc"], @@ -440,6 +449,83 @@ cc_test( ], ) +cc_test( + name = "depthwiseconv_float_test", + srcs = ["depthwiseconv_float_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + ":test_util", + ":types", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "depthwiseconv_quantized_test", + srcs = ["depthwiseconv_quantized_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + ":test_util", + ":types", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "resize_bilinear_float_test", + srcs = ["resize_bilinear_float_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + ":test_util", + ":types", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "softmax_quantized_test", + timeout = "long", + srcs = [ + "softmax_quantized_test.cc", + ], + deps = [ + ":optimized_base", + ":quantization_util", + ":reference_base", + ":test_util", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "logsoftmax_quantized_test", + timeout = "long", + srcs = [ + "logsoftmax_quantized_test.cc", + ], + tags = ["tflite_not_portable"], + deps = [ + ":optimized_base", + ":quantization_util", + ":reference_base", + ":test_util", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "log_quantized_test", + srcs = ["log_quantized_test.cc"], + deps = [ + ":optimized_base", + ":reference_base", + "@com_google_googletest//:gtest_main", + ], +) + cc_library( name = "cpu_check", hdrs = [ diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc new file mode 100644 index 0000000..844ee6a --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc @@ -0,0 +1,162 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" + +namespace tflite { +namespace { + +// Runs the DepthwiseConv and compares against the reference implementation. +template +void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride, int pad_width, int pad_height, + int depth_multiplier, const Dims<4>& output_dims) { + const int output_buffer_size = RequiredBufferSizeForDims(output_dims); + std::vector output_data(output_buffer_size); + std::vector reference_output_data(output_buffer_size); + reference_ops::DepthwiseConv(input_data, input_dims, filter_data, + filter_dims, bias_data, bias_dims, stride, + pad_width, pad_height, depth_multiplier, + reference_output_data.data(), output_dims); + optimized_ops::DepthwiseConv(input_data, input_dims, filter_data, + filter_dims, bias_data, bias_dims, stride, + pad_width, pad_height, depth_multiplier, + output_data.data(), output_dims); + double sum_abs_diff = 0; + float max_abs_val = 0; + for (int i = 0; i < output_buffer_size; i++) { + sum_abs_diff += std::abs(output_data[i] - reference_output_data[i]); + max_abs_val = std::max(max_abs_val, std::abs(reference_output_data[i])); + } + if (sum_abs_diff != 0.f) { + const float mean_diff = + static_cast(sum_abs_diff / output_buffer_size); + const float relative_error = std::abs(mean_diff) / max_abs_val; + ASSERT_LT(relative_error, 1e-5f); + } +} + +void TestOneDepthwiseConv(FusedActivationFunctionType Ac, + const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride, int pad_width, int pad_height, + int depth_multiplier, const Dims<4>& output_dims) { +#define TOCO_HANDLE_CASE(AC_TYPE) \ + if (AC_TYPE == Ac) { \ + TestOneDepthwiseConv(input_data, input_dims, filter_data, \ + filter_dims, bias_data, bias_dims, stride, \ + pad_width, pad_height, depth_multiplier, \ + output_dims); \ + return; \ + } + TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6) +#undef TOCO_HANDLE_CASE +} + +// This function picks some random DepthwiseConv params, which may or may not +// be legal. If they're not legal, it returns false. If they're legal, +// it runs the DepthwiseConv test and returns true. This allows the caller +// to loop until a test has been run. +bool TryTestOneDepthwiseConv() { + // We have to pick a lot of positive values, where we are particularly + // interested in small values because they are most likely to be special + // cases in optimized implementations, and secondarily because they allow + // tests to run fast, which means we can run more tests and get more + // coverage. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int filter_width = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const int output_depth = input_depth * depth_multiplier; + // The optimized DepthwiseConv implementation currently uses a fixed-size + // accumulator buffer on the stack, with that size. This currently means + // that it does not support larger output depths. It CHECK's for it, + // so it's safe in the sense that if a larger output depth was encountered, + // it would explicitly fail. We just need to adjust our testing to that + // constraint. + const int kMaxSupportedOutputDepth = 1024; + if (output_depth > kMaxSupportedOutputDepth) { + return false; + } + const auto ac = RandomElement(std::vector( + {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu, + FusedActivationFunctionType::kRelu6, + FusedActivationFunctionType::kRelu1})); + Dims<4> input_dims_inference = + MakeDimsForInference(input_depth, input_width, input_height, batch); + Dims<4> output_dims_inference; + int pad_width, pad_height; + const auto padding_type = + UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; + if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width, + filter_height, stride, padding_type, + &output_dims_inference, &pad_width, &pad_height)) { + return false; + } + Dims<4> filter_dims_inference = + MakeDimsForInference(output_depth, filter_width, filter_height, 1); + Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1); + const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference); + const int filter_buffer_size = + RequiredBufferSizeForDims(filter_dims_inference); + std::vector input_data(input_buffer_size); + std::vector filter_data(filter_buffer_size); + std::vector bias_data(output_depth); + const float input_amplitude = 1.f; + const float filter_amplitude = 1.f; + const float bias_amplitude = + filter_width * filter_height * input_amplitude * filter_amplitude; + FillRandom(&input_data, -input_amplitude, input_amplitude); + FillRandom(&filter_data, -filter_amplitude, filter_amplitude); + FillRandom(&bias_data, -bias_amplitude, bias_amplitude); + TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference, + filter_data.data(), filter_dims_inference, + bias_data.data(), bias_dims_inference, stride, pad_width, + pad_height, depth_multiplier, output_dims_inference); + return true; +} + +void TestOneDepthwiseConv() { + while (!TryTestOneDepthwiseConv()) { + } +} + +TEST(TestDepthwiseConv, TestDepthwiseConv) { + const int kTestsToRun = 100 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + TestOneDepthwiseConv(); + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc new file mode 100644 index 0000000..2c0fc84 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc @@ -0,0 +1,330 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" + +namespace tflite { +namespace { + +// Runs the DepthwiseConv and compares against the reference implementation. +template +int TestOneDepthwiseConvWithGivenOutputShift( + const std::uint8_t* input_data, const Dims<4>& input_dims, + std::int32_t input_offset, const std::uint8_t* filter_data, + const Dims<4>& filter_dims, std::int32_t filter_offset, + const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + std::int32_t output_offset, std::int32_t output_multiplier, + int output_shift, std::int32_t output_activation_min, + std::int32_t output_activation_max, const Dims<4>& output_dims) { + const int output_buffer_size = RequiredBufferSizeForDims(output_dims); + std::vector output_data(output_buffer_size); + std::vector reference_output_data(output_buffer_size); + reference_ops::DepthwiseConv( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, + reference_output_data.data(), output_dims); + optimized_ops::DepthwiseConv( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data.data(), + output_dims); + int saturated_min = 0; + int saturated_max = 0; + std::vector diff(output_buffer_size); + std::int64_t sum_diff = 0; + std::int64_t sum_abs_diff = 0; + for (int i = 0; i < output_buffer_size; i++) { + diff[i] = static_cast(output_data[i]) - + static_cast(reference_output_data[i]); + sum_diff += diff[i]; + sum_abs_diff += std::abs(diff[i]); + saturated_min += output_data[i] == output_activation_min; + saturated_max += output_data[i] == output_activation_max; + } + // These stats help understand test failures. + std::sort(std::begin(diff), std::end(diff)); + const int min_diff = diff.front(); + const int max_diff = diff.back(); + const int median_diff = diff[diff.size() / 2]; + const float mean_diff = static_cast(sum_diff) / output_buffer_size; + const float mean_abs_diff = + static_cast(sum_abs_diff) / output_buffer_size; + // Normally we should require bit-for-bit exact results. Unfortunately a bug + // in the Intel arm_neon_sse.h translation header that we use for x86 tests + // causes 1-bit inaccuracy in + // the vqrdmulh_n_s32 intrinsic, which causes off-by-1 errors in quantized + // DepthwiseConv ops. So we have to live with a few off-by-one errors for now, + // yet still ensure that no more than a small minority of values are wrong. + EXPECT_TRUE(std::abs(mean_diff) < 1e-5f && mean_abs_diff < 1e-5f && + std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 && + std::abs(max_diff) <= 1); + if (saturated_min > 2 * saturated_max) { + return -1; + } + if (saturated_max > 2 * saturated_min) { + return 1; + } + return 0; +} + +// The point of this function is that we can't practically know which +// output_shift value to pass to test DepthwiseConv. It's not easy to guess (we +// could do some +// statistics for large size, but they would be fragile at smaller sizes), and +// guessing wrong would mean that all the values get saturated so the test +// becomes +// vacuous. So we just bisect our way to reasonable output_shift values. +template +void TestOneDepthwiseConvBisectOutputShift( + const std::uint8_t* input_data, const Dims<4>& input_dims, + std::int32_t input_offset, const std::uint8_t* filter_data, + const Dims<4>& filter_dims, std::int32_t filter_offset, + const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + std::int32_t output_offset, std::int32_t output_multiplier, + int output_activation_bisect_start, int output_activation_bisect_end, + std::int32_t output_activation_min, std::int32_t output_activation_max, + const Dims<4>& output_dims) { + ASSERT_LT(output_activation_bisect_start, output_activation_bisect_end) + << "Bisection failed ?!?!"; + int output_shift_bisect_midpoint = + (output_activation_bisect_start + output_activation_bisect_end) / 2; + int bisect_result = TestOneDepthwiseConvWithGivenOutputShift( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, + output_shift_bisect_midpoint, output_activation_min, + output_activation_max, output_dims); + // At this point we know that the test succeeded (otherwise it would have + // aborted). + if (bisect_result == 0) { + // The result isn't particularly saturated on one or the other side. + // All good, we're done. + return; + } + if (output_activation_bisect_start == output_activation_bisect_end - 1) { + // There is still some saturation on one side, but the bisection is + // finished anyways. We're done; nothing more we can do about it. This + // happens + // in particular when using an activation with a narrow range. + return; + } + // Continue the bisection based on the present result. + int new_output_activation_bisect_start = bisect_result == 1 + ? output_shift_bisect_midpoint + : output_activation_bisect_start; + int new_output_activation_bisect_end = bisect_result == 1 + ? output_activation_bisect_end + : output_shift_bisect_midpoint; + TestOneDepthwiseConvBisectOutputShift( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, + new_output_activation_bisect_start, new_output_activation_bisect_end, + output_activation_min, output_activation_max, output_dims); +} + +template +void TestOneDepthwiseConv( + const std::uint8_t* input_data, const Dims<4>& input_dims, + std::int32_t input_offset, const std::uint8_t* filter_data, + const Dims<4>& filter_dims, std::int32_t filter_offset, + const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + std::int32_t output_offset, std::int32_t output_multiplier, + std::int32_t output_activation_min, std::int32_t output_activation_max, + const Dims<4>& output_dims) { + TestOneDepthwiseConvBisectOutputShift( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, + depth_multiplier, output_offset, output_multiplier, 0, 32, + output_activation_min, output_activation_max, output_dims); +} + +void TestOneDepthwiseConv( + FusedActivationFunctionType Ac, const std::uint8_t* input_data, + const Dims<4>& input_dims, std::int32_t input_offset, + const std::uint8_t* filter_data, const Dims<4>& filter_dims, + std::int32_t filter_offset, const std::int32_t* bias_data, + const Dims<4>& bias_dims, int stride, int pad_width, int pad_height, + int depth_multiplier, std::int32_t output_offset, + std::int32_t output_multiplier, std::int32_t output_activation_min, + std::int32_t output_activation_max, const Dims<4>& output_dims) { +#define TOCO_HANDLE_CASE(AC_TYPE) \ + if (AC_TYPE == Ac) { \ + TestOneDepthwiseConv( \ + input_data, input_dims, input_offset, filter_data, filter_dims, \ + filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, \ + depth_multiplier, output_offset, output_multiplier, \ + output_activation_min, output_activation_max, output_dims); \ + return; \ + } + TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1) + TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6) +#undef TOCO_HANDLE_CASE +} + +bool TryTestDepthwiseConv(int batch, int input_depth, int input_width, + int input_height, int filter_width, int filter_height, + int depth_multiplier, int stride, + PaddingType padding_type) { + const int output_depth = input_depth * depth_multiplier; + // The optimized DepthwiseConv implementation currently uses a fixed-size + // accumulator buffer on the stack, with that size. This currently means + // that it does not support larger output depths. It CHECK's for it, + // so it's safe in the sense that if a larger output depth was encountered, + // it would explicitly fail. We just need to adjust our testing to that + // constraint. + const int kMaxSupportedOutputDepth = 1024; + if (output_depth > kMaxSupportedOutputDepth) { + return false; + } + const auto ac = RandomElement(std::vector( + {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu, + FusedActivationFunctionType::kRelu6, + FusedActivationFunctionType::kRelu1})); + int output_activation_min = 0; + int output_activation_max = 255; + if (ac != FusedActivationFunctionType::kNone && UniformRandomInt(0, 1)) { + output_activation_min = UniformRandomInt(0, 50); + output_activation_max = UniformRandomInt(200, 255); + } + const std::int32_t output_multiplier = + UniformRandomInt(1 << 29, std::numeric_limits::max()); + const std::int32_t input_offset = UniformRandomInt(-256, 0); + const std::int32_t filter_offset = UniformRandomInt(-256, 0); + const std::int32_t output_offset = UniformRandomInt(-256, 0); + Dims<4> input_dims_inference = + MakeDimsForInference(input_depth, input_width, input_height, batch); + Dims<4> output_dims_inference; + int pad_width, pad_height; + if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width, + filter_height, stride, padding_type, + &output_dims_inference, &pad_width, &pad_height)) { + return false; + } + Dims<4> filter_dims_inference = + MakeDimsForInference(output_depth, filter_width, filter_height, 1); + Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1); + const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference); + const int filter_buffer_size = + RequiredBufferSizeForDims(filter_dims_inference); + std::vector input_data(input_buffer_size); + std::vector filter_data(filter_buffer_size); + std::vector bias_data(output_depth); + FillRandom(&input_data); + FillRandom(&filter_data); + FillRandom(&bias_data, -10000, 10000); + TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference, + input_offset, filter_data.data(), filter_dims_inference, + filter_offset, bias_data.data(), bias_dims_inference, + stride, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_activation_min, + output_activation_max, output_dims_inference); + return true; +} + +// This function picks some random DepthwiseConv params, which may or may not +// be legal. If they're not legal, it returns false. If they're legal, +// it runs the DepthwiseConv test and returns true. This allows the caller +// to loop until a test has been run. +bool TryTestOneDepthwiseConv() { + // We have to pick a lot of positive values, where we are particularly + // interested in small values because they are most likely to be special + // cases in optimized implementations, and secondarily because they allow + // tests to run fast, which means we can run more tests and get more + // coverage. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int filter_width = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10); + const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const auto padding_type = + UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; + + return TryTestDepthwiseConv(batch, input_depth, input_width, input_height, + filter_width, filter_height, depth_multiplier, + stride, padding_type); +} + +// Tests parameters for the 3x3 filter kernel. +bool TryTestOneDepthwiseConv3x3Filter() { + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = 8 * ExponentialRandomPositiveInt(0.9f, 10, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int filter_width = 3; + const int filter_height = 3; + const int depth_multiplier = 1; + const int stride = UniformRandomInt(1, 2); + // Although the kernel supports only kValid padding, we test that kSame + // is using the correct code path. + const auto padding_type = + UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid; + + return TryTestDepthwiseConv(batch, input_depth, input_width, input_height, + filter_width, filter_height, depth_multiplier, + stride, padding_type); +} + +void TestOneDepthwiseConv() { + while (!TryTestOneDepthwiseConv()) { + } +} + +void TestOneDepthwiseConv3x3Filter() { + while (!TryTestOneDepthwiseConv3x3Filter()) { + } +} + +TEST(TestDepthwiseConv, TestDepthwiseConv) { + const int kTestsToRun = 10 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + TestOneDepthwiseConv(); + } +} + +TEST(TestDepthwiseConv3x3Filter, TestDepthwiseConv) { + const int kTestsToRun = 3 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + TestOneDepthwiseConv3x3Filter(); + } +} + +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc new file mode 100644 index 0000000..7e9ff52 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc @@ -0,0 +1,333 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS + +#include +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" + +namespace { + +class NumberGenerator { + public: + std::vector RandomIntVector(int n, int min_val, int max_val) { + std::vector vec(n); + double scale = static_cast(max_val + 1 - min_val) / engine_.max(); + for (auto& it : vec) { + it = min_val + std::floor(engine_() * scale); + } + return vec; + } + + std::mt19937 engine_; +}; + +class LogQuantizedTest : public ::testing::Test { + public: + NumberGenerator generator_; +}; + +// input_integer_bits <= 30. output_integer_bits > 0. +inline int32 LogPositiveValuesViaFloat(int32 input_val, int input_integer_bits, + int output_integer_bits) { + const double float_log_sum_of_exps = std::log( + static_cast(input_val) * 0.5 / (1 << (30 - input_integer_bits))); + static constexpr double min_int = + static_cast(std::numeric_limits::min()); + static constexpr double max_int = + static_cast(std::numeric_limits::max()); + double double_result = tflite::TfLiteRound(float_log_sum_of_exps * + (1 << (31 - output_integer_bits))); + return static_cast( + std::min(max_int, std::max(min_int, double_result))); +} + +void CheckOutputData(const std::vector& test_output, + const std::vector& reference_output, + const std::vector& test_input, + const string& check_label, int input_integer_bits, + int output_integer_bits, int tolerance) { + // In the special case of small input, specifically raw value of 5, a rounding + // up leads to difference in the output. We do not aim to be accurate for + // very small input values, and there should be sufficient input fractional + // bits that this is a small input. + static constexpr double error_from_rounding_up = 0.0224585; + const int n = test_output.size(); + ASSERT_EQ(n, reference_output.size()); + for (int i = 0; i < n; ++i) { + // Adjust tolerance when input <= 5*2^-(31-input_integer_bits). + const int adjusted_tolerance = + test_input[i] > 5 + ? tolerance + : std::max(tolerance, static_cast(std::ceil( + error_from_rounding_up * + (1 << (31 - output_integer_bits))))); + ASSERT_LE(std::abs(test_output[i] - reference_output[i]), + adjusted_tolerance) + << "Failure in \"" << check_label << "\" at i=" << i + << ", test_input[i]=" << test_input[i] << "=" + << static_cast(test_input[i]) / (1 << (31 - input_integer_bits)) + << ", test_output[i]=" << test_output[i] << "=" + << static_cast(test_output[i]) / + (1 << (31 - output_integer_bits)) + << ", reference_output[i]=" << reference_output[i] << "=" + << static_cast(reference_output[i]) / + (1 << (31 - output_integer_bits)) + << ", difference[i]=" << std::abs(reference_output[i] - test_output[i]) + << "=" + << static_cast(std::abs(reference_output[i] - test_output[i])) / + (1 << (31 - output_integer_bits)) + << "; tolerance=" << tolerance + << ", adj tolerance=" << adjusted_tolerance; + } +} + +void RightShiftVector(const std::vector& shifts, + std::vector* vec) { + const int n = vec->size(); + ASSERT_EQ(n, shifts.size()); + for (int i = 0; i < n; ++i) { + vec->at(i) = std::max(1, vec->at(i) >> shifts[i]); + } +} + +template +void RunSingleTest(const std::vector& test_input, + const string& check_label, int tolerance) { + const int n = test_input.size(); + std::vector float_gen_output(n, 0); + std::vector reference_output(n, 0); + std::vector optimized_output(n, 0); + + // Workaround the stupid things that intelligent humans do. + // Consequence of __builtin_clz(0u) may equal 31 instead of 32. + std::vector fudged_input(n, 0); + for (int i = 0; i < n; ++i) { + fudged_input[i] = std::max(test_input[i], 2); + } + + for (int i = 0; i < n; ++i) { + reference_output[i] = + tflite::reference_ops::log_x_for_x_greater_than_or_equal_to_1_impl< + OutputIntegerBits, InputIntegerBits>( + gemmlowp::FixedPoint::FromRaw( + fudged_input[i])) + .raw(); + optimized_output[i] = + tflite::optimized_ops::log_x_for_x_greater_than_or_equal_to_1_impl< + OutputIntegerBits, InputIntegerBits>( + gemmlowp::FixedPoint::FromRaw( + fudged_input[i])) + .raw(); + float_gen_output[i] = LogPositiveValuesViaFloat( + fudged_input[i], InputIntegerBits, OutputIntegerBits); + } + // Note that first check is intolerant. + { + std::ostringstream label; + label << check_label << " / optimized vs reference / InputIntegerBits=" + << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits; + CheckOutputData( + optimized_output, reference_output, test_input, label.str(), + InputIntegerBits, OutputIntegerBits, 0); + } + { + std::ostringstream label; + label << check_label << " / reference vs float-gen / InputIntegerBits=" + << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits; + CheckOutputData( + reference_output, float_gen_output, test_input, label.str(), + InputIntegerBits, OutputIntegerBits, tolerance); + } + { + std::ostringstream label; + label << check_label << " optimized vs float-gen / InputIntegerBits=" + << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits; + CheckOutputData( + optimized_output, float_gen_output, test_input, label.str(), + InputIntegerBits, OutputIntegerBits, tolerance); + } +} + +template +void RunSingleTest(const std::vector& test_input, int input_integer_bits, + const string& check_label, int tolerance) { +#define INPUT_CASE(K) \ + case K: \ + return RunSingleTest(test_input, check_label, \ + tolerance) + switch (input_integer_bits) { + INPUT_CASE(0); + INPUT_CASE(1); + INPUT_CASE(2); + INPUT_CASE(3); + INPUT_CASE(4); + INPUT_CASE(5); + INPUT_CASE(6); + INPUT_CASE(7); + INPUT_CASE(8); + INPUT_CASE(9); + INPUT_CASE(10); + INPUT_CASE(11); + INPUT_CASE(12); + INPUT_CASE(13); + INPUT_CASE(14); + INPUT_CASE(15); + INPUT_CASE(16); + INPUT_CASE(17); + INPUT_CASE(18); + INPUT_CASE(19); + INPUT_CASE(20); + INPUT_CASE(21); + INPUT_CASE(22); + INPUT_CASE(23); + INPUT_CASE(24); + INPUT_CASE(25); + INPUT_CASE(26); + INPUT_CASE(27); + INPUT_CASE(28); + INPUT_CASE(29); + default: + ASSERT_LE(input_integer_bits, 30) + << "Input integer bits not handled: " << input_integer_bits; + } +#undef INPUT_CASE +} + +void RunSingleTest(const std::vector& test_input, int input_integer_bits, + int output_integer_bits, const string& check_label, + int tolerance) { +#define OUTPUT_CASE(K) \ + case K: \ + return RunSingleTest(test_input, input_integer_bits, check_label, \ + tolerance) + switch (output_integer_bits) { + OUTPUT_CASE(0); + OUTPUT_CASE(1); + OUTPUT_CASE(2); + OUTPUT_CASE(3); + OUTPUT_CASE(4); + OUTPUT_CASE(5); + OUTPUT_CASE(6); + OUTPUT_CASE(7); + OUTPUT_CASE(8); + OUTPUT_CASE(9); + OUTPUT_CASE(10); + OUTPUT_CASE(11); + OUTPUT_CASE(12); + OUTPUT_CASE(13); + OUTPUT_CASE(14); + OUTPUT_CASE(15); + OUTPUT_CASE(16); + OUTPUT_CASE(17); + OUTPUT_CASE(18); + OUTPUT_CASE(19); + OUTPUT_CASE(20); + OUTPUT_CASE(21); + OUTPUT_CASE(22); + OUTPUT_CASE(23); + OUTPUT_CASE(24); + OUTPUT_CASE(25); + OUTPUT_CASE(26); + OUTPUT_CASE(27); + OUTPUT_CASE(28); + OUTPUT_CASE(29); + default: + ASSERT_LE(input_integer_bits, 30) + << "Input integer bits not handled: " << input_integer_bits; + } +#undef OUTPUT_CASE +} + +void RunUniformTest(int test_size, int input_integer_bits, + int output_integer_bits, const string& check_label, + int tolerance, NumberGenerator* generator) { + std::vector test_data = generator->RandomIntVector( + test_size, 2, std::numeric_limits::max() - 1); + test_data[0] = 2; + test_data[1] = 3; + test_data[2] = 4; + test_data[3] = std::numeric_limits::max() - 2; + test_data[4] = std::numeric_limits::max() - 1; + test_data[5] = std::numeric_limits::max(); + + RunSingleTest(test_data, input_integer_bits, output_integer_bits, + check_label + " / uniform test", tolerance); +} + +void RunUniformShiftUniformTest(int test_size, int input_integer_bits, + int output_integer_bits, + const string& check_label, int tolerance, + NumberGenerator* generator) { + std::vector test_data = generator->RandomIntVector( + test_size, 2, std::numeric_limits::max() - 1); + std::vector shifts = generator->RandomIntVector(test_size, 0, 29); + RightShiftVector(shifts, &test_data); + + RunSingleTest(test_data, input_integer_bits, output_integer_bits, + check_label + " / shifted test", tolerance); +} + +TEST_F(LogQuantizedTest, VariedIntegerBits) { + static constexpr int kVariations = 250; + static constexpr int kRunSize = 250; + static constexpr int kIntegerTolerance = 8; + static constexpr double kOutputFloatTolerance = 7.0e-7; + + std::vector input_integer_bits = + generator_.RandomIntVector(kVariations, 0, 24); + std::vector output_integer_bits = + generator_.RandomIntVector(kVariations, 1, 10); + + for (int i = 0; i < kVariations; ++i) { + int var_output_integer_bits = output_integer_bits[i]; + int tolerance = + std::max(1.0 * kIntegerTolerance, + (1 << (31 - var_output_integer_bits)) * kOutputFloatTolerance); + + RunUniformTest(kRunSize, input_integer_bits[i], var_output_integer_bits, + "VariedIntegerBits", tolerance, &generator_); + RunUniformShiftUniformTest(kRunSize, input_integer_bits[i], + var_output_integer_bits, "VariedIntegerBits", + tolerance, &generator_); + } +} + +TEST_F(LogQuantizedTest, SelectedIntegerBits) { + static constexpr int kInputBits = 12; + static constexpr int kOutputBits = 5; + static constexpr int kRunSize = 100000; + static constexpr int kIntegerTolerance = 4; + + RunUniformTest(kRunSize, kInputBits, kOutputBits, "SelectedIntegerBits", + kIntegerTolerance, &generator_); + RunUniformShiftUniformTest(kRunSize, kInputBits, kOutputBits, + "SelectedIntegerBits", kIntegerTolerance, + &generator_); +} + +} // namespace diff --git a/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc new file mode 100644 index 0000000..b7531ea --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc @@ -0,0 +1,241 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" + +namespace tflite { +namespace { + +void RunLogSoftmaxFloatReference(const uint8* input_data, + const Dims<4>& dims_common, int32 input_offset, + const double input_scale, int stride, + float beta, uint8* reference_output_data) { + const int ref_buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector reference_dequant_data(ref_buffer_size); + std::vector reference_output_float_data(ref_buffer_size); + + // Reference data generated via Dequant of input into float, and then applying + // float LogSoftmax. + reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale, + reference_dequant_data.data(), dims_common); + optimized_ops::LogSoftmax(reference_dequant_data.data(), dims_common, + reference_output_float_data.data(), dims_common); + // Work with quantized scaling for LogSoftmax, under which 255 represents 0, + // and -16 gets nudged up to 0. + for (int i = 0; i < ref_buffer_size; i++) { + reference_output_data[i] = std::max( + 0, static_cast( + 255 + std::round(16.0f * reference_output_float_data[i]))); + } +} + +void CheckOutputData(const uint8* test_output, const uint8* reference_output, + const Dims<4>& dims_common, const string& check_label, + bool be_exacting) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + // While calculating some metrics in floating point, we work with quantized + // scaling. + std::vector diff(buffer_size); + int64_t sum_diff = 0; + int64_t sum_abs_diff = 0; + for (int i = 0; i < buffer_size; i++) { + diff[i] = static_cast(test_output[i]) - reference_output[i]; + sum_diff += diff[i]; + sum_abs_diff += std::abs(diff[i]); + } + // These stats help understand test failures. + std::sort(std::begin(diff), std::end(diff)); + const int min_diff = diff.front(); + const int max_diff = diff.back(); + const int median_diff = diff[diff.size() / 2]; + const float mean_diff = static_cast(sum_diff) / buffer_size; + const float mean_abs_diff = static_cast(sum_abs_diff) / buffer_size; + // We either check for bit exactness (against the reference quantized version) + // or for general accuracy, allowing off-by-one (against the float reference). + if (be_exacting) { + ASSERT_TRUE(std::abs(min_diff) == 0 && std::abs(max_diff) == 0) + << check_label << ": " + << "std::abs(min_diff)=" << std::abs(min_diff) + << ", std::abs(max_diff)=" << std::abs(max_diff); + } else { + // For small numbers of samples, the estimates of the means vary more. + // Rather than widen the tolerances, we skip the smaller tests. + ASSERT_TRUE(((std::abs(mean_diff) < 2e-2f && mean_abs_diff < 3e-2f) || + buffer_size < 10000) && + std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 && + std::abs(max_diff) <= 1) + << check_label << ": " + << "buffer_size=" << buffer_size << ", mean_diff=" << mean_diff + << ", mean_abs_diff=" << mean_abs_diff + << ", median_diff=" << median_diff << ", min_diff=" << min_diff + << ", max_diff=" << max_diff; + } +} + +// Runs the LogSoftmax and compares against the float reference implementation +// and the quantized reference implementation. +void RunOneLogSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common, + int32 input_offset, const double input_scale, + int stride, float beta) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector optimized_logsoftmax_output(buffer_size); + std::vector reference_float_logsoftmax_output(buffer_size); + std::vector reference_quant_logsoftmax_output(buffer_size); + + RunLogSoftmaxFloatReference(input_data, dims_common, input_offset, + input_scale, stride, beta, + reference_float_logsoftmax_output.data()); + + int32 input_beta_multiplier; + int input_beta_left_shift; + int32 reverse_scaling_divisor; + int reverse_scaling_right_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessLogSoftmaxScaling( + beta, input_scale, kScaledDiffIntegerBits, &input_beta_multiplier, + &input_beta_left_shift, &reverse_scaling_divisor, + &reverse_scaling_right_shift); + // diff_min has a negative value, and is used to limit the maximum magnitude + // of the diffs, which are <= 0. + const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits, + input_beta_left_shift); + + optimized_ops::LogSoftmax(input_data, dims_common, input_beta_multiplier, + input_beta_left_shift, reverse_scaling_divisor, + reverse_scaling_right_shift, diff_min, + optimized_logsoftmax_output.data(), dims_common); + reference_ops::LogSoftmax( + input_data, dims_common, input_beta_multiplier, input_beta_left_shift, + reverse_scaling_divisor, reverse_scaling_right_shift, diff_min, + reference_quant_logsoftmax_output.data(), dims_common); + + CheckOutputData(optimized_logsoftmax_output.data(), + reference_float_logsoftmax_output.data(), dims_common, + "Optimized vs float reference", false); + CheckOutputData(optimized_logsoftmax_output.data(), + reference_quant_logsoftmax_output.data(), dims_common, + "Optimized vs quant reference", true); + CheckOutputData(reference_quant_logsoftmax_output.data(), + reference_float_logsoftmax_output.data(), dims_common, + "Quant reference vs float reference", false); +} + +// This function picks some random LogSoftmax params, which are checked for +// desirability. If not acceptable, it returns false. If they're OK, +// it runs the LogSoftmax test and returns true. This allows the caller +// to loop until a test has been run. +// +// Currently we do not reject for any reason. +bool TryOneUniformLogSoftmax() { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // LogSoftmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + static constexpr float beta = 1.0f; + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandom(&input_data); + RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset, + input_scale, stride, beta); + return true; +} + +// See TryOneUniformLogSoftmax() for a general description. +// +// Tests with "skyscraper" input patterns are included for two reasons. (a) +// Bimodal distributions are potentially challenging and perhaps more +// realistic than simple uniform random inputs. (b) Some implementations of +// LogSoftmax may adapt as they traverse the depth, and so we test handling of +// cases where relatively small values are encountered at the beginning and end. +bool TryOneSkyscraperLogSoftmax(bool small_depth) { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // LogSoftmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = small_depth + ? ExponentialRandomPositiveInt(0.75f, 40, 500) + : ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + static constexpr float beta = 1.0f; + // Extra parameters for skyscraper input patterns. + const double middle_proportion = + ExponentialRandomPositiveFloat(0.65f, 0.1, 1.0); + const int middle_min = UniformRandomInt(0, 255); + const int sides_max = UniformRandomInt(0, middle_min); + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min, + sides_max); + RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset, + input_scale, stride, beta); + return true; +} + +TEST(TestQuantizedLogSoftmax, UniformLogSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneUniformLogSoftmax()) { + } + } +} + +TEST(TestQuantizedLogSoftmax, SkyscraperLogSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperLogSoftmax(false)) { + } + } +} + +TEST(TestQuantizedLogSoftmax, SmallSkyscraperLogSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperLogSoftmax(true)) { + } + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc new file mode 100644 index 0000000..c1c50df --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace { +void TestOneResizeBilinear(int batch, int depth, int input_width, + int input_height, int output_width, + int output_height) { + Dims<4> input_dims_inference = + MakeDimsForInference(depth, input_width, input_height, batch); + Dims<4> output_dims_inference = + MakeDimsForInference(depth, output_width, output_height, batch); + + const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference); + const int output_buffer_size = + RequiredBufferSizeForDims(output_dims_inference); + + std::vector input_data(input_buffer_size, 0); + std::vector reference_output_data(output_buffer_size, 0); + // Initialize the output data with something other than zero, so we can catch + // issue with kernels failing to initialize the output. + std::vector output_data(output_buffer_size, 3.1415); + + const float input_amplitude = 1.f; + FillRandom(&input_data, -input_amplitude, input_amplitude); + + Dims<4> output_size_dims = MakeDimsForInference(2, 1, 1, 1); + std::vector output_size_data = {output_height, output_width}; + + reference_ops::ResizeBilinear( + input_data.data(), input_dims_inference, output_size_data.data(), + output_size_dims, reference_output_data.data(), output_dims_inference); + optimized_ops::ResizeBilinear(input_data.data(), input_dims_inference, + output_size_data.data(), output_size_dims, + output_data.data(), output_dims_inference); + + double sum_diff = 0; + float max_abs_val = 0; + for (int i = 0; i < output_buffer_size; i++) { + sum_diff += std::abs(output_data[i] - reference_output_data[i]); + max_abs_val = std::max(max_abs_val, std::abs(reference_output_data[i])); + } + + if (sum_diff != 0.f) { + const float mean_diff = static_cast(sum_diff / output_buffer_size); + const float relative_error = std::abs(mean_diff) / max_abs_val; + ASSERT_LT(relative_error, 1e-5f); + } +} + +TEST(ResizeBilinear, TestResizeBilinear) { + const int kTestsToRun = 100 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int output_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int output_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + + TestOneResizeBilinear(batch, depth, input_width, input_height, output_width, + output_height); + } +} + +TEST(ResizeBilinear2x2, TestResizeBilinear) { + const int kTestsToRun = 100 * 1000; + for (int i = 0; i < kTestsToRun; i++) { + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int depth = ExponentialRandomPositiveInt(0.9f, 6, 50); + const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200); + const int output_width = input_width * 2; + const int output_height = input_height * 2; + + TestOneResizeBilinear(batch, depth, input_width, input_height, output_width, + output_height); + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc new file mode 100644 index 0000000..d781a7b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc @@ -0,0 +1,227 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" + +namespace tflite { +namespace { + +void RunSoftmaxFloatReference(const uint8* input_data, + const Dims<4>& dims_common, int32 input_offset, + const double input_scale, int stride, float beta, + uint8* reference_output_data) { + const int ref_buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector reference_dequant_data(ref_buffer_size); + std::vector reference_output_float_data(ref_buffer_size); + + // Reference data generated via Dequant of input into float, and then applying + // float Softmax. + reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale, + reference_dequant_data.data(), dims_common); + optimized_ops::Softmax(reference_dequant_data.data(), dims_common, beta, + reference_output_float_data.data(), dims_common); + // Work with quantized scaling for Softmax, under which 256 represents 1, but + // we limit this to 255. + for (int i = 0; i < ref_buffer_size; i++) { + reference_output_data[i] = std::min( + 255, + static_cast(std::round(256.0f * reference_output_float_data[i]))); + } +} + +void CheckOutputData(const uint8* test_output, const uint8* reference_output, + const Dims<4>& dims_common, const string& check_label, + bool be_exacting) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + // While calculating some metrics in floating point, we work with quantized + // scaling. + std::vector diff(buffer_size); + int64_t sum_diff = 0; + int64_t sum_abs_diff = 0; + for (int i = 0; i < buffer_size; i++) { + diff[i] = static_cast(test_output[i]) - reference_output[i]; + sum_diff += diff[i]; + sum_abs_diff += std::abs(diff[i]); + } + // These stats help understand test failures. + std::sort(std::begin(diff), std::end(diff)); + const int min_diff = diff.front(); + const int max_diff = diff.back(); + const int median_diff = diff[diff.size() / 2]; + const float mean_diff = static_cast(sum_diff) / buffer_size; + const float mean_abs_diff = static_cast(sum_abs_diff) / buffer_size; + // We either check for bit exactness (against the reference quantized version) + // or for general accuracy, allowing off-by-one (against the float reference). + if (be_exacting) { + ASSERT_TRUE(std::abs(min_diff) == 0 && std::abs(max_diff) == 0); + } else { + // For small numbers of samples, the estimates of the means vary more. + // Rather than widen the tolerances, we skip the smaller tests. + ASSERT_TRUE(((std::abs(mean_diff) < 2e-2f && mean_abs_diff < 3e-2f) || + buffer_size < 10000) && + std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 && + std::abs(max_diff) <= 1); + } +} + +// Runs the Softmax and compares against the float reference implementation and +// the quantized reference implementation. +void RunOneSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common, + int32 input_offset, const double input_scale, int stride, + float beta) { + const int buffer_size = RequiredBufferSizeForDims(dims_common); + std::vector optimized_softmax_output(buffer_size); + std::vector reference_float_softmax_output(buffer_size); + std::vector reference_quant_softmax_output(buffer_size); + + RunSoftmaxFloatReference(input_data, dims_common, input_offset, input_scale, + stride, beta, reference_float_softmax_output.data()); + + int32 input_beta_multiplier; + int input_beta_left_shift; + static const int kScaledDiffIntegerBits = 5; + tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits, + &input_beta_multiplier, + &input_beta_left_shift); + // diff_min has a negative value, and is used to limit the maximum magnitude + // of the diffs, which are <= 0. + const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits, + input_beta_left_shift); + + optimized_ops::Softmax(input_data, dims_common, input_beta_multiplier, + input_beta_left_shift, diff_min, + optimized_softmax_output.data(), dims_common); + reference_ops::Softmax(input_data, dims_common, input_beta_multiplier, + input_beta_left_shift, diff_min, + reference_quant_softmax_output.data(), dims_common); + + CheckOutputData(optimized_softmax_output.data(), + reference_float_softmax_output.data(), dims_common, + "Optimized vs float reference", false); + CheckOutputData(optimized_softmax_output.data(), + reference_quant_softmax_output.data(), dims_common, + "Optimized vs quant reference", true); + CheckOutputData(reference_quant_softmax_output.data(), + reference_float_softmax_output.data(), dims_common, + "Quant reference vs float reference", false); +} + +// This function picks some random Softmax params, which are checked for +// desirability. If not acceptable, it returns false. If they're OK, +// it runs the Softmax test and returns true. This allows the caller +// to loop until a test has been run. +// +// Currently we do not reject for any reason. +bool TryOneUniformSoftmax() { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // Softmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.8f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10); + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandom(&input_data); + RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, + stride, beta); + return true; +} + +// See TryOneUniformSoftmax() for a general description. +// +// Tests with "skyscraper" input patterns are included for two reasons. (a) +// Bimodal distributions are potentially challenging and perhaps more +// realistic than simple uniform random inputs. (b) Some implementations of +// Softmax may adapt as they traverse the depth, and so we test handling of +// cases where relatively small values are encountered at the beginning and end. +bool TryOneSkyscraperSoftmax(bool small_depth) { + // We pick mostly positive values, on the whole emphasizing smaller values and + // therefore faster tests. We test a wider range of depths. In the case of + // Softmax, the width and height really just create test repetitions. + const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20); + const int input_depth = small_depth + ? ExponentialRandomPositiveInt(0.75f, 40, 500) + : ExponentialRandomPositiveInt(0.75f, 175, 500); + const int input_width = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int input_height = ExponentialRandomPositiveInt(0.7f, 20, 200); + const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8); + const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0)); + const int32 input_offset = UniformRandomInt(-256, 0); + const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10); + // Extra parameters for skyscraper input patterns. + const double middle_proportion = + ExponentialRandomPositiveFloat(0.65f, 0.1, 1.0); + const int middle_min = UniformRandomInt(0, 255); + const int sides_max = UniformRandomInt(0, middle_min); + + Dims<4> dims_common = + MakeDimsForInference(input_depth, input_width, input_height, batch); + const int buffer_size = RequiredBufferSizeForDims(dims_common); + + std::vector input_data(buffer_size); + FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min, + sides_max); + RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale, + stride, beta); + return true; +} + +TEST(TestQuantizedSoftmax, UniformSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneUniformSoftmax()) { + } + } +} + +TEST(TestQuantizedSoftmax, SkyscraperSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperSoftmax(false)) { + } + } +} + +TEST(TestQuantizedSoftmax, SmallSkyscraperSoftmaxTests) { + const int kTestsToRun = 1000; + for (int i = 0; i < kTestsToRun; i++) { + while (!TryOneSkyscraperSoftmax(true)) { + } + } +} +} // namespace +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.cc b/tensorflow/contrib/lite/kernels/internal/test_util.cc new file mode 100644 index 0000000..9b1fd9b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/test_util.cc @@ -0,0 +1,121 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/test_util.h" + +#include +#include + +namespace tflite { + +Dims<4> MakeDimsForInference(int depth, int width, int height, int batch) { + Dims<4> result; + int cum_prod = 1; + + result.sizes[0] = depth; + result.strides[0] = cum_prod; + cum_prod *= result.sizes[0]; + + result.sizes[1] = width; + result.strides[1] = cum_prod; + cum_prod *= result.sizes[1]; + + result.sizes[2] = height; + result.strides[2] = cum_prod; + cum_prod *= result.sizes[2]; + + result.sizes[3] = batch; + result.strides[3] = cum_prod; + + return result; +} + +// this is a copied from an internal function in propagate_fixed_sizes.cc +bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width, + int filter_height, int stride, PaddingType padding_type, + Dims<4>* output_dims, int* pad_width, int* pad_height) { + const int input_width = ArraySize(input_dims, 1); + const int input_height = ArraySize(input_dims, 2); + const int batch = ArraySize(input_dims, 3); + + int output_height = 0; + int output_width = 0; + if (padding_type == PaddingType::kValid) { + output_height = (input_height + stride - filter_height) / stride; + output_width = (input_width + stride - filter_width) / stride; + } else if (padding_type == PaddingType::kSame) { + output_height = (input_height + stride - 1) / stride; + output_width = (input_width + stride - 1) / stride; + } else { + return false; + } + + if (output_width <= 0 || output_height <= 0) { + return false; + } + + *pad_height = + ((output_height - 1) * stride + filter_height - input_height) / 2; + *pad_width = ((output_width - 1) * stride + filter_width - input_width) / 2; + *output_dims = + MakeDimsForInference(output_depth, output_width, output_height, batch); + return true; +} + +std::mt19937& RandomEngine() { + static std::mt19937 engine; + return engine; +} + +int UniformRandomInt(int min, int max) { + std::uniform_int_distribution dist(min, max); + return dist(RandomEngine()); +} + +float UniformRandomFloat(float min, float max) { + std::uniform_real_distribution dist(min, max); + return dist(RandomEngine()); +} + +int ExponentialRandomPositiveInt(float percentile, int percentile_val, + int max_val) { + const float lambda = + -std::log(1.f - percentile) / static_cast(percentile_val); + std::exponential_distribution dist(lambda); + float val; + do { + val = dist(RandomEngine()); + } while (!val || !std::isfinite(val) || val > max_val); + return static_cast(std::ceil(val)); +} + +float ExponentialRandomPositiveFloat(float percentile, float percentile_val, + float max_val) { + const float lambda = + -std::log(1.f - percentile) / static_cast(percentile_val); + std::exponential_distribution dist(lambda); + float val; + do { + val = dist(RandomEngine()); + } while (!std::isfinite(val) || val > max_val); + return val; +} + +void FillRandom(std::vector* vec, float min, float max) { + std::uniform_real_distribution dist(min, max); + auto gen = std::bind(dist, RandomEngine()); + std::generate(std::begin(*vec), std::end(*vec), gen); +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.h b/tensorflow/contrib/lite/kernels/internal/test_util.h new file mode 100644 index 0000000..26078ce --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/test_util.h @@ -0,0 +1,104 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_ + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { + +// Creates a Dims struct from a set of dimensions. +Dims<4> MakeDimsForInference(int depth, int width, int height, int batch); + +// Computes output and padding dimensions. +bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width, + int filter_height, int stride, PaddingType padding_type, + Dims<4>* output_dims, int* pad_width, int* pad_height); + +// Returns a mt19937 random engine. +std::mt19937& RandomEngine(); + +// Returns a random integer uniformly distributed between |min| and |max|. +int UniformRandomInt(int min, int max); + +// Returns a random float uniformly distributed between |min| and |max|. +float UniformRandomFloat(float min, float max); + +// Returns a random element in |v|. +template +const T& RandomElement(const std::vector& v) { + return v[UniformRandomInt(0, v.size() - 1)]; +} + +// Returns a random exponentially distributed integer. +int ExponentialRandomPositiveInt(float percentile, int percentile_val, + int max_val); + +// Returns a random exponentially distributed float. +float ExponentialRandomPositiveFloat(float percentile, float percentile_val, + float max_val); + +// Fills a vector with random floats between |min| and |max|. +void FillRandom(std::vector* vec, float min, float max); + +// Fills a vector with random numbers between |min| and |max|. +template +void FillRandom(std::vector* vec, T min, T max) { + std::uniform_int_distribution dist(min, max); + auto gen = std::bind(dist, RandomEngine()); + std::generate(std::begin(*vec), std::end(*vec), gen); +} + +// Fills a vector with random numbers. +template +void FillRandom(std::vector* vec) { + FillRandom(vec, std::numeric_limits::min(), std::numeric_limits::max()); +} + +template +void FillRandom(typename std::vector::iterator begin_it, + typename std::vector::iterator end_it, T min, T max) { + std::uniform_int_distribution dist(min, max); + auto gen = std::bind(dist, RandomEngine()); + std::generate(begin_it, end_it, gen); +} + +// Fill with a "skyscraper" pattern, in which there is a central section (across +// the depth) with higher values than the surround. +template +void FillRandomSkyscraper(std::vector* vec, int depth, + double middle_proportion, uint8 middle_min, + uint8 sides_max) { + for (auto base_it = std::begin(*vec); base_it != std::end(*vec); + base_it += depth) { + auto left_it = base_it + std::ceil(0.5 * depth * (1.0 - middle_proportion)); + auto right_it = + base_it + std::ceil(0.5 * depth * (1.0 + middle_proportion)); + FillRandom(base_it, left_it, std::numeric_limits::min(), sides_max); + FillRandom(left_it, right_it, middle_min, std::numeric_limits::max()); + FillRandom(right_it, base_it + depth, std::numeric_limits::min(), + sides_max); + } +} + +} // namespace tflite +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index 43c6883..d5293ed 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -20,6 +20,7 @@ limitations under the License. namespace tflite { enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu }; +enum class PaddingType { kNone, kSame, kValid }; // Quantization parameters, determining the mapping of quantized values // to real values (i.e. determining how quantized values are mathematically -- 2.7.4