From 9f332ea94b21aff6a73089db4d8e147748fb6ff6 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 23 May 2018 22:33:53 -0700
Subject: [PATCH] Add unit tests to tflite kernels

PiperOrigin-RevId: 197842122
---
 tensorflow/contrib/lite/kernels/internal/BUILD     |  86 ++++++
 .../kernels/internal/depthwiseconv_float_test.cc   | 162 ++++++++++
 .../internal/depthwiseconv_quantized_test.cc       | 330 ++++++++++++++++++++
 .../lite/kernels/internal/log_quantized_test.cc    | 333 +++++++++++++++++++++
 .../kernels/internal/logsoftmax_quantized_test.cc  | 241 +++++++++++++++
 .../kernels/internal/resize_bilinear_float_test.cc | 102 +++++++
 .../kernels/internal/softmax_quantized_test.cc     | 227 ++++++++++++++
 .../contrib/lite/kernels/internal/test_util.cc     | 121 ++++++++
 .../contrib/lite/kernels/internal/test_util.h      | 104 +++++++
 tensorflow/contrib/lite/kernels/internal/types.h   |   1 +
 10 files changed, 1707 insertions(+)
 create mode 100644 tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
 create mode 100644 tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
 create mode 100644 tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc
 create mode 100644 tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc
 create mode 100644 tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc
 create mode 100644 tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc
 create mode 100644 tensorflow/contrib/lite/kernels/internal/test_util.cc
 create mode 100644 tensorflow/contrib/lite/kernels/internal/test_util.h

diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD
index aabbb06..0a5223b 100644
--- a/tensorflow/contrib/lite/kernels/internal/BUILD
+++ b/tensorflow/contrib/lite/kernels/internal/BUILD
@@ -420,6 +420,15 @@ cc_library(
     }),
 )
 
+cc_library(
+    name = "test_util",
+    srcs = ["test_util.cc"],
+    hdrs = ["test_util.h"],
+    deps = [
+        ":types",
+    ],
+)
+
 cc_test(
     name = "tensor_utils_test",
     srcs = ["tensor_utils_test.cc"],
@@ -440,6 +449,83 @@ cc_test(
     ],
 )
 
+cc_test(
+    name = "depthwiseconv_float_test",
+    srcs = ["depthwiseconv_float_test.cc"],
+    deps = [
+        ":optimized_base",
+        ":reference_base",
+        ":test_util",
+        ":types",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "depthwiseconv_quantized_test",
+    srcs = ["depthwiseconv_quantized_test.cc"],
+    deps = [
+        ":optimized_base",
+        ":reference_base",
+        ":test_util",
+        ":types",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "resize_bilinear_float_test",
+    srcs = ["resize_bilinear_float_test.cc"],
+    deps = [
+        ":optimized_base",
+        ":reference_base",
+        ":test_util",
+        ":types",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "softmax_quantized_test",
+    timeout = "long",
+    srcs = [
+        "softmax_quantized_test.cc",
+    ],
+    deps = [
+        ":optimized_base",
+        ":quantization_util",
+        ":reference_base",
+        ":test_util",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "logsoftmax_quantized_test",
+    timeout = "long",
+    srcs = [
+        "logsoftmax_quantized_test.cc",
+    ],
+    tags = ["tflite_not_portable"],
+    deps = [
+        ":optimized_base",
+        ":quantization_util",
+        ":reference_base",
+        ":test_util",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "log_quantized_test",
+    srcs = ["log_quantized_test.cc"],
+    deps = [
+        ":optimized_base",
+        ":reference_base",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
 cc_library(
     name = "cpu_check",
     hdrs = [
diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
new file mode 100644
index 0000000..844ee6a
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_float_test.cc
@@ -0,0 +1,162 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/internal/test_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+
+#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
+#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h"
+
+namespace tflite {
+namespace {
+
+// Runs the DepthwiseConv and compares against the reference implementation.
+template <FusedActivationFunctionType Ac>
+void TestOneDepthwiseConv(const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride, int pad_width, int pad_height,
+                          int depth_multiplier, const Dims<4>& output_dims) {
+  const int output_buffer_size = RequiredBufferSizeForDims(output_dims);
+  std::vector<float> output_data(output_buffer_size);
+  std::vector<float> reference_output_data(output_buffer_size);
+  reference_ops::DepthwiseConv<Ac>(input_data, input_dims, filter_data,
+                                   filter_dims, bias_data, bias_dims, stride,
+                                   pad_width, pad_height, depth_multiplier,
+                                   reference_output_data.data(), output_dims);
+  optimized_ops::DepthwiseConv<Ac>(input_data, input_dims, filter_data,
+                                   filter_dims, bias_data, bias_dims, stride,
+                                   pad_width, pad_height, depth_multiplier,
+                                   output_data.data(), output_dims);
+  double sum_abs_diff = 0;
+  float max_abs_val = 0;
+  for (int i = 0; i < output_buffer_size; i++) {
+    sum_abs_diff += std::abs(output_data[i] - reference_output_data[i]);
+    max_abs_val = std::max(max_abs_val, std::abs(reference_output_data[i]));
+  }
+  if (sum_abs_diff != 0.f) {
+    const float mean_diff =
+        static_cast<float>(sum_abs_diff / output_buffer_size);
+    const float relative_error = std::abs(mean_diff) / max_abs_val;
+    ASSERT_LT(relative_error, 1e-5f);
+  }
+}
+
+void TestOneDepthwiseConv(FusedActivationFunctionType Ac,
+                          const float* input_data, const Dims<4>& input_dims,
+                          const float* filter_data, const Dims<4>& filter_dims,
+                          const float* bias_data, const Dims<4>& bias_dims,
+                          int stride, int pad_width, int pad_height,
+                          int depth_multiplier, const Dims<4>& output_dims) {
+#define TOCO_HANDLE_CASE(AC_TYPE)                                            \
+  if (AC_TYPE == Ac) {                                                       \
+    TestOneDepthwiseConv<AC_TYPE>(input_data, input_dims, filter_data,       \
+                                  filter_dims, bias_data, bias_dims, stride, \
+                                  pad_width, pad_height, depth_multiplier,   \
+                                  output_dims);                              \
+    return;                                                                  \
+  }
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone)
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu)
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1)
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6)
+#undef TOCO_HANDLE_CASE
+}
+
+// This function picks some random DepthwiseConv params, which may or may not
+// be legal. If they're not legal, it returns false. If they're legal,
+// it runs the DepthwiseConv test and returns true. This allows the caller
+// to loop until a test has been run.
+bool TryTestOneDepthwiseConv() {
+  // We have to pick a lot of positive values, where we are particularly
+  // interested in small values because they are most likely to be special
+  // cases in optimized implementations, and secondarily because they allow
+  // tests to run fast, which means we can run more tests and get more
+  // coverage.
+  const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+  const int input_depth = ExponentialRandomPositiveInt(0.9f, 6, 50);
+  const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200);
+  const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200);
+  const int filter_width = ExponentialRandomPositiveInt(0.9f, 4, 10);
+  const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10);
+  const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50);
+  const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
+  const int output_depth = input_depth * depth_multiplier;
+  // The optimized DepthwiseConv implementation currently uses a fixed-size
+  // accumulator buffer on the stack, with that size. This currently means
+  // that it does not support larger output depths. It CHECK's for it,
+  // so it's safe in the sense that if a larger output depth was encountered,
+  // it would explicitly fail. We just need to adjust our testing to that
+  // constraint.
+  const int kMaxSupportedOutputDepth = 1024;
+  if (output_depth > kMaxSupportedOutputDepth) {
+    return false;
+  }
+  const auto ac = RandomElement(std::vector<FusedActivationFunctionType>(
+      {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu,
+       FusedActivationFunctionType::kRelu6,
+       FusedActivationFunctionType::kRelu1}));
+  Dims<4> input_dims_inference =
+      MakeDimsForInference(input_depth, input_width, input_height, batch);
+  Dims<4> output_dims_inference;
+  int pad_width, pad_height;
+  const auto padding_type =
+      UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid;
+  if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width,
+                        filter_height, stride, padding_type,
+                        &output_dims_inference, &pad_width, &pad_height)) {
+    return false;
+  }
+  Dims<4> filter_dims_inference =
+      MakeDimsForInference(output_depth, filter_width, filter_height, 1);
+  Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1);
+  const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference);
+  const int filter_buffer_size =
+      RequiredBufferSizeForDims(filter_dims_inference);
+  std::vector<float> input_data(input_buffer_size);
+  std::vector<float> filter_data(filter_buffer_size);
+  std::vector<float> bias_data(output_depth);
+  const float input_amplitude = 1.f;
+  const float filter_amplitude = 1.f;
+  const float bias_amplitude =
+      filter_width * filter_height * input_amplitude * filter_amplitude;
+  FillRandom(&input_data, -input_amplitude, input_amplitude);
+  FillRandom(&filter_data, -filter_amplitude, filter_amplitude);
+  FillRandom(&bias_data, -bias_amplitude, bias_amplitude);
+  TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference,
+                       filter_data.data(), filter_dims_inference,
+                       bias_data.data(), bias_dims_inference, stride, pad_width,
+                       pad_height, depth_multiplier, output_dims_inference);
+  return true;
+}
+
+void TestOneDepthwiseConv() {
+  while (!TryTestOneDepthwiseConv()) {
+  }
+}
+
+TEST(TestDepthwiseConv, TestDepthwiseConv) {
+  const int kTestsToRun = 100 * 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    TestOneDepthwiseConv();
+  }
+}
+}  // namespace
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
new file mode 100644
index 0000000..2c0fc84
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/depthwiseconv_quantized_test.cc
@@ -0,0 +1,330 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <sys/types.h>
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/internal/test_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+
+#define ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK
+#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h"
+
+namespace tflite {
+namespace {
+
+// Runs the DepthwiseConv and compares against the reference implementation.
+template <FusedActivationFunctionType Ac>
+int TestOneDepthwiseConvWithGivenOutputShift(
+    const std::uint8_t* input_data, const Dims<4>& input_dims,
+    std::int32_t input_offset, const std::uint8_t* filter_data,
+    const Dims<4>& filter_dims, std::int32_t filter_offset,
+    const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride,
+    int pad_width, int pad_height, int depth_multiplier,
+    std::int32_t output_offset, std::int32_t output_multiplier,
+    int output_shift, std::int32_t output_activation_min,
+    std::int32_t output_activation_max, const Dims<4>& output_dims) {
+  const int output_buffer_size = RequiredBufferSizeForDims(output_dims);
+  std::vector<std::uint8_t> output_data(output_buffer_size);
+  std::vector<std::uint8_t> reference_output_data(output_buffer_size);
+  reference_ops::DepthwiseConv<Ac>(
+      input_data, input_dims, input_offset, filter_data, filter_dims,
+      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      depth_multiplier, output_offset, output_multiplier, output_shift,
+      output_activation_min, output_activation_max,
+      reference_output_data.data(), output_dims);
+  optimized_ops::DepthwiseConv<Ac>(
+      input_data, input_dims, input_offset, filter_data, filter_dims,
+      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      depth_multiplier, output_offset, output_multiplier, output_shift,
+      output_activation_min, output_activation_max, output_data.data(),
+      output_dims);
+  int saturated_min = 0;
+  int saturated_max = 0;
+  std::vector<int> diff(output_buffer_size);
+  std::int64_t sum_diff = 0;
+  std::int64_t sum_abs_diff = 0;
+  for (int i = 0; i < output_buffer_size; i++) {
+    diff[i] = static_cast<int>(output_data[i]) -
+              static_cast<int>(reference_output_data[i]);
+    sum_diff += diff[i];
+    sum_abs_diff += std::abs(diff[i]);
+    saturated_min += output_data[i] == output_activation_min;
+    saturated_max += output_data[i] == output_activation_max;
+  }
+  // These stats help understand test failures.
+  std::sort(std::begin(diff), std::end(diff));
+  const int min_diff = diff.front();
+  const int max_diff = diff.back();
+  const int median_diff = diff[diff.size() / 2];
+  const float mean_diff = static_cast<float>(sum_diff) / output_buffer_size;
+  const float mean_abs_diff =
+      static_cast<float>(sum_abs_diff) / output_buffer_size;
+  // Normally we should require bit-for-bit exact results. Unfortunately a bug
+  // in the Intel arm_neon_sse.h translation header that we use for x86 tests
+  // causes 1-bit inaccuracy in
+  // the vqrdmulh_n_s32 intrinsic, which causes off-by-1 errors in quantized
+  // DepthwiseConv ops. So we have to live with a few off-by-one errors for now,
+  // yet still ensure that no more than a small minority of values are wrong.
+  EXPECT_TRUE(std::abs(mean_diff) < 1e-5f && mean_abs_diff < 1e-5f &&
+              std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 &&
+              std::abs(max_diff) <= 1);
+  if (saturated_min > 2 * saturated_max) {
+    return -1;
+  }
+  if (saturated_max > 2 * saturated_min) {
+    return 1;
+  }
+  return 0;
+}
+
+// The point of this function is that we can't practically know which
+// output_shift value to pass to test DepthwiseConv. It's not easy to guess (we
+// could do some
+// statistics for large size, but they would be fragile at smaller sizes), and
+// guessing wrong would mean that all the values get saturated so the test
+// becomes
+// vacuous. So we just bisect our way to reasonable output_shift values.
+template <FusedActivationFunctionType Ac>
+void TestOneDepthwiseConvBisectOutputShift(
+    const std::uint8_t* input_data, const Dims<4>& input_dims,
+    std::int32_t input_offset, const std::uint8_t* filter_data,
+    const Dims<4>& filter_dims, std::int32_t filter_offset,
+    const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride,
+    int pad_width, int pad_height, int depth_multiplier,
+    std::int32_t output_offset, std::int32_t output_multiplier,
+    int output_activation_bisect_start, int output_activation_bisect_end,
+    std::int32_t output_activation_min, std::int32_t output_activation_max,
+    const Dims<4>& output_dims) {
+  ASSERT_LT(output_activation_bisect_start, output_activation_bisect_end)
+      << "Bisection failed ?!?!";
+  int output_shift_bisect_midpoint =
+      (output_activation_bisect_start + output_activation_bisect_end) / 2;
+  int bisect_result = TestOneDepthwiseConvWithGivenOutputShift<Ac>(
+      input_data, input_dims, input_offset, filter_data, filter_dims,
+      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      depth_multiplier, output_offset, output_multiplier,
+      output_shift_bisect_midpoint, output_activation_min,
+      output_activation_max, output_dims);
+  // At this point we know that the test succeeded (otherwise it would have
+  // aborted).
+  if (bisect_result == 0) {
+    // The result isn't particularly saturated on one or the other side.
+    // All good, we're done.
+    return;
+  }
+  if (output_activation_bisect_start == output_activation_bisect_end - 1) {
+    // There is still some saturation on one side, but the bisection is
+    // finished anyways. We're done; nothing more we can do about it. This
+    // happens
+    // in particular when using an activation with a narrow range.
+    return;
+  }
+  // Continue the bisection based on the present result.
+  int new_output_activation_bisect_start = bisect_result == 1
+                                               ? output_shift_bisect_midpoint
+                                               : output_activation_bisect_start;
+  int new_output_activation_bisect_end = bisect_result == 1
+                                             ? output_activation_bisect_end
+                                             : output_shift_bisect_midpoint;
+  TestOneDepthwiseConvBisectOutputShift<Ac>(
+      input_data, input_dims, input_offset, filter_data, filter_dims,
+      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      depth_multiplier, output_offset, output_multiplier,
+      new_output_activation_bisect_start, new_output_activation_bisect_end,
+      output_activation_min, output_activation_max, output_dims);
+}
+
+template <FusedActivationFunctionType Ac>
+void TestOneDepthwiseConv(
+    const std::uint8_t* input_data, const Dims<4>& input_dims,
+    std::int32_t input_offset, const std::uint8_t* filter_data,
+    const Dims<4>& filter_dims, std::int32_t filter_offset,
+    const std::int32_t* bias_data, const Dims<4>& bias_dims, int stride,
+    int pad_width, int pad_height, int depth_multiplier,
+    std::int32_t output_offset, std::int32_t output_multiplier,
+    std::int32_t output_activation_min, std::int32_t output_activation_max,
+    const Dims<4>& output_dims) {
+  TestOneDepthwiseConvBisectOutputShift<Ac>(
+      input_data, input_dims, input_offset, filter_data, filter_dims,
+      filter_offset, bias_data, bias_dims, stride, pad_width, pad_height,
+      depth_multiplier, output_offset, output_multiplier, 0, 32,
+      output_activation_min, output_activation_max, output_dims);
+}
+
+void TestOneDepthwiseConv(
+    FusedActivationFunctionType Ac, const std::uint8_t* input_data,
+    const Dims<4>& input_dims, std::int32_t input_offset,
+    const std::uint8_t* filter_data, const Dims<4>& filter_dims,
+    std::int32_t filter_offset, const std::int32_t* bias_data,
+    const Dims<4>& bias_dims, int stride, int pad_width, int pad_height,
+    int depth_multiplier, std::int32_t output_offset,
+    std::int32_t output_multiplier, std::int32_t output_activation_min,
+    std::int32_t output_activation_max, const Dims<4>& output_dims) {
+#define TOCO_HANDLE_CASE(AC_TYPE)                                           \
+  if (AC_TYPE == Ac) {                                                      \
+    TestOneDepthwiseConv<AC_TYPE>(                                          \
+        input_data, input_dims, input_offset, filter_data, filter_dims,     \
+        filter_offset, bias_data, bias_dims, stride, pad_width, pad_height, \
+        depth_multiplier, output_offset, output_multiplier,                 \
+        output_activation_min, output_activation_max, output_dims);         \
+    return;                                                                 \
+  }
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kNone)
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu)
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu1)
+  TOCO_HANDLE_CASE(FusedActivationFunctionType::kRelu6)
+#undef TOCO_HANDLE_CASE
+}
+
+bool TryTestDepthwiseConv(int batch, int input_depth, int input_width,
+                          int input_height, int filter_width, int filter_height,
+                          int depth_multiplier, int stride,
+                          PaddingType padding_type) {
+  const int output_depth = input_depth * depth_multiplier;
+  // The optimized DepthwiseConv implementation currently uses a fixed-size
+  // accumulator buffer on the stack, with that size. This currently means
+  // that it does not support larger output depths. It CHECK's for it,
+  // so it's safe in the sense that if a larger output depth was encountered,
+  // it would explicitly fail. We just need to adjust our testing to that
+  // constraint.
+  const int kMaxSupportedOutputDepth = 1024;
+  if (output_depth > kMaxSupportedOutputDepth) {
+    return false;
+  }
+  const auto ac = RandomElement(std::vector<FusedActivationFunctionType>(
+      {FusedActivationFunctionType::kNone, FusedActivationFunctionType::kRelu,
+       FusedActivationFunctionType::kRelu6,
+       FusedActivationFunctionType::kRelu1}));
+  int output_activation_min = 0;
+  int output_activation_max = 255;
+  if (ac != FusedActivationFunctionType::kNone && UniformRandomInt(0, 1)) {
+    output_activation_min = UniformRandomInt(0, 50);
+    output_activation_max = UniformRandomInt(200, 255);
+  }
+  const std::int32_t output_multiplier =
+      UniformRandomInt(1 << 29, std::numeric_limits<std::int32_t>::max());
+  const std::int32_t input_offset = UniformRandomInt(-256, 0);
+  const std::int32_t filter_offset = UniformRandomInt(-256, 0);
+  const std::int32_t output_offset = UniformRandomInt(-256, 0);
+  Dims<4> input_dims_inference =
+      MakeDimsForInference(input_depth, input_width, input_height, batch);
+  Dims<4> output_dims_inference;
+  int pad_width, pad_height;
+  if (!ComputeConvSizes(input_dims_inference, output_depth, filter_width,
+                        filter_height, stride, padding_type,
+                        &output_dims_inference, &pad_width, &pad_height)) {
+    return false;
+  }
+  Dims<4> filter_dims_inference =
+      MakeDimsForInference(output_depth, filter_width, filter_height, 1);
+  Dims<4> bias_dims_inference = MakeDimsForInference(output_depth, 1, 1, 1);
+  const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference);
+  const int filter_buffer_size =
+      RequiredBufferSizeForDims(filter_dims_inference);
+  std::vector<std::uint8_t> input_data(input_buffer_size);
+  std::vector<std::uint8_t> filter_data(filter_buffer_size);
+  std::vector<std::int32_t> bias_data(output_depth);
+  FillRandom(&input_data);
+  FillRandom(&filter_data);
+  FillRandom(&bias_data, -10000, 10000);
+  TestOneDepthwiseConv(ac, input_data.data(), input_dims_inference,
+                       input_offset, filter_data.data(), filter_dims_inference,
+                       filter_offset, bias_data.data(), bias_dims_inference,
+                       stride, pad_width, pad_height, depth_multiplier,
+                       output_offset, output_multiplier, output_activation_min,
+                       output_activation_max, output_dims_inference);
+  return true;
+}
+
+// This function picks some random DepthwiseConv params, which may or may not
+// be legal. If they're not legal, it returns false. If they're legal,
+// it runs the DepthwiseConv test and returns true. This allows the caller
+// to loop until a test has been run.
+bool TryTestOneDepthwiseConv() {
+  // We have to pick a lot of positive values, where we are particularly
+  // interested in small values because they are most likely to be special
+  // cases in optimized implementations, and secondarily because they allow
+  // tests to run fast, which means we can run more tests and get more
+  // coverage.
+  const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+  const int input_depth = ExponentialRandomPositiveInt(0.9f, 6, 50);
+  const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200);
+  const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200);
+  const int filter_width = ExponentialRandomPositiveInt(0.9f, 4, 10);
+  const int filter_height = ExponentialRandomPositiveInt(0.9f, 4, 10);
+  const int depth_multiplier = ExponentialRandomPositiveInt(0.8f, 6, 50);
+  const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
+  const auto padding_type =
+      UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid;
+
+  return TryTestDepthwiseConv(batch, input_depth, input_width, input_height,
+                              filter_width, filter_height, depth_multiplier,
+                              stride, padding_type);
+}
+
+// Tests parameters for the 3x3 filter kernel.
+bool TryTestOneDepthwiseConv3x3Filter() {
+  const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+  const int input_depth = 8 * ExponentialRandomPositiveInt(0.9f, 10, 50);
+  const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200);
+  const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200);
+  const int filter_width = 3;
+  const int filter_height = 3;
+  const int depth_multiplier = 1;
+  const int stride = UniformRandomInt(1, 2);
+  // Although the kernel supports only kValid padding, we test that kSame
+  // is using the correct code path.
+  const auto padding_type =
+      UniformRandomInt(0, 1) ? PaddingType::kSame : PaddingType::kValid;
+
+  return TryTestDepthwiseConv(batch, input_depth, input_width, input_height,
+                              filter_width, filter_height, depth_multiplier,
+                              stride, padding_type);
+}
+
+void TestOneDepthwiseConv() {
+  while (!TryTestOneDepthwiseConv()) {
+  }
+}
+
+void TestOneDepthwiseConv3x3Filter() {
+  while (!TryTestOneDepthwiseConv3x3Filter()) {
+  }
+}
+
+TEST(TestDepthwiseConv, TestDepthwiseConv) {
+  const int kTestsToRun = 10 * 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    TestOneDepthwiseConv();
+  }
+}
+
+TEST(TestDepthwiseConv3x3Filter, TestDepthwiseConv) {
+  const int kTestsToRun = 3 * 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    TestOneDepthwiseConv3x3Filter();
+  }
+}
+
+}  // namespace
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc
new file mode 100644
index 0000000..7e9ff52
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/log_quantized_test.cc
@@ -0,0 +1,333 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <random>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#define GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+
+namespace {
+
+class NumberGenerator {
+ public:
+  std::vector<int> RandomIntVector(int n, int min_val, int max_val) {
+    std::vector<int> vec(n);
+    double scale = static_cast<double>(max_val + 1 - min_val) / engine_.max();
+    for (auto& it : vec) {
+      it = min_val + std::floor(engine_() * scale);
+    }
+    return vec;
+  }
+
+  std::mt19937 engine_;
+};
+
+class LogQuantizedTest : public ::testing::Test {
+ public:
+  NumberGenerator generator_;
+};
+
+// input_integer_bits <= 30.  output_integer_bits > 0.
+inline int32 LogPositiveValuesViaFloat(int32 input_val, int input_integer_bits,
+                                       int output_integer_bits) {
+  const double float_log_sum_of_exps = std::log(
+      static_cast<double>(input_val) * 0.5 / (1 << (30 - input_integer_bits)));
+  static constexpr double min_int =
+      static_cast<double>(std::numeric_limits<int32>::min());
+  static constexpr double max_int =
+      static_cast<double>(std::numeric_limits<int32>::max());
+  double double_result = tflite::TfLiteRound(float_log_sum_of_exps *
+                                             (1 << (31 - output_integer_bits)));
+  return static_cast<std::int32_t>(
+      std::min(max_int, std::max(min_int, double_result)));
+}
+
+void CheckOutputData(const std::vector<int32>& test_output,
+                     const std::vector<int32>& reference_output,
+                     const std::vector<int32>& test_input,
+                     const string& check_label, int input_integer_bits,
+                     int output_integer_bits, int tolerance) {
+  // In the special case of small input, specifically raw value of 5, a rounding
+  // up leads to difference in the output.  We do not aim to be accurate for
+  // very small input values, and there should be sufficient input fractional
+  // bits that this is a small input.
+  static constexpr double error_from_rounding_up = 0.0224585;
+  const int n = test_output.size();
+  ASSERT_EQ(n, reference_output.size());
+  for (int i = 0; i < n; ++i) {
+    // Adjust tolerance when input <= 5*2^-(31-input_integer_bits).
+    const int adjusted_tolerance =
+        test_input[i] > 5
+            ? tolerance
+            : std::max(tolerance, static_cast<int>(std::ceil(
+                                      error_from_rounding_up *
+                                      (1 << (31 - output_integer_bits)))));
+    ASSERT_LE(std::abs(test_output[i] - reference_output[i]),
+              adjusted_tolerance)
+        << "Failure in \"" << check_label << "\" at i=" << i
+        << ", test_input[i]=" << test_input[i] << "="
+        << static_cast<double>(test_input[i]) / (1 << (31 - input_integer_bits))
+        << ", test_output[i]=" << test_output[i] << "="
+        << static_cast<double>(test_output[i]) /
+               (1 << (31 - output_integer_bits))
+        << ", reference_output[i]=" << reference_output[i] << "="
+        << static_cast<double>(reference_output[i]) /
+               (1 << (31 - output_integer_bits))
+        << ", difference[i]=" << std::abs(reference_output[i] - test_output[i])
+        << "="
+        << static_cast<double>(std::abs(reference_output[i] - test_output[i])) /
+               (1 << (31 - output_integer_bits))
+        << "; tolerance=" << tolerance
+        << ", adj tolerance=" << adjusted_tolerance;
+  }
+}
+
+void RightShiftVector(const std::vector<int32>& shifts,
+                      std::vector<int32>* vec) {
+  const int n = vec->size();
+  ASSERT_EQ(n, shifts.size());
+  for (int i = 0; i < n; ++i) {
+    vec->at(i) = std::max(1, vec->at(i) >> shifts[i]);
+  }
+}
+
+template <int OutputIntegerBits, int InputIntegerBits>
+void RunSingleTest(const std::vector<int32>& test_input,
+                   const string& check_label, int tolerance) {
+  const int n = test_input.size();
+  std::vector<int32> float_gen_output(n, 0);
+  std::vector<int32> reference_output(n, 0);
+  std::vector<int32> optimized_output(n, 0);
+
+  // Workaround the stupid things that intelligent humans do.
+  // Consequence of __builtin_clz(0u) may equal 31 instead of 32.
+  std::vector<int32> fudged_input(n, 0);
+  for (int i = 0; i < n; ++i) {
+    fudged_input[i] = std::max(test_input[i], 2);
+  }
+
+  for (int i = 0; i < n; ++i) {
+    reference_output[i] =
+        tflite::reference_ops::log_x_for_x_greater_than_or_equal_to_1_impl<
+            OutputIntegerBits, InputIntegerBits>(
+            gemmlowp::FixedPoint<int32, InputIntegerBits>::FromRaw(
+                fudged_input[i]))
+            .raw();
+    optimized_output[i] =
+        tflite::optimized_ops::log_x_for_x_greater_than_or_equal_to_1_impl<
+            OutputIntegerBits, InputIntegerBits>(
+            gemmlowp::FixedPoint<int32, InputIntegerBits>::FromRaw(
+                fudged_input[i]))
+            .raw();
+    float_gen_output[i] = LogPositiveValuesViaFloat(
+        fudged_input[i], InputIntegerBits, OutputIntegerBits);
+  }
+  // Note that first check is intolerant.
+  {
+    std::ostringstream label;
+    label << check_label << " / optimized vs reference / InputIntegerBits="
+          << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits;
+    CheckOutputData(
+        optimized_output, reference_output, test_input, label.str(),
+        InputIntegerBits, OutputIntegerBits, 0);
+  }
+  {
+    std::ostringstream label;
+    label << check_label << " / reference vs float-gen / InputIntegerBits="
+          << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits;
+    CheckOutputData(
+        reference_output, float_gen_output, test_input, label.str(),
+        InputIntegerBits, OutputIntegerBits, tolerance);
+  }
+  {
+    std::ostringstream label;
+    label << check_label << " optimized vs float-gen / InputIntegerBits="
+          << InputIntegerBits << ", OutputIntegerBits=" << OutputIntegerBits;
+    CheckOutputData(
+        optimized_output, float_gen_output, test_input, label.str(),
+        InputIntegerBits, OutputIntegerBits, tolerance);
+  }
+}
+
+template <int OutputIntegerBits>
+void RunSingleTest(const std::vector<int32>& test_input, int input_integer_bits,
+                   const string& check_label, int tolerance) {
+#define INPUT_CASE(K)                                                   \
+  case K:                                                               \
+    return RunSingleTest<OutputIntegerBits, K>(test_input, check_label, \
+                                               tolerance)
+  switch (input_integer_bits) {
+    INPUT_CASE(0);
+    INPUT_CASE(1);
+    INPUT_CASE(2);
+    INPUT_CASE(3);
+    INPUT_CASE(4);
+    INPUT_CASE(5);
+    INPUT_CASE(6);
+    INPUT_CASE(7);
+    INPUT_CASE(8);
+    INPUT_CASE(9);
+    INPUT_CASE(10);
+    INPUT_CASE(11);
+    INPUT_CASE(12);
+    INPUT_CASE(13);
+    INPUT_CASE(14);
+    INPUT_CASE(15);
+    INPUT_CASE(16);
+    INPUT_CASE(17);
+    INPUT_CASE(18);
+    INPUT_CASE(19);
+    INPUT_CASE(20);
+    INPUT_CASE(21);
+    INPUT_CASE(22);
+    INPUT_CASE(23);
+    INPUT_CASE(24);
+    INPUT_CASE(25);
+    INPUT_CASE(26);
+    INPUT_CASE(27);
+    INPUT_CASE(28);
+    INPUT_CASE(29);
+    default:
+      ASSERT_LE(input_integer_bits, 30)
+                << "Input integer bits not handled: " << input_integer_bits;
+  }
+#undef INPUT_CASE
+}
+
+void RunSingleTest(const std::vector<int32>& test_input, int input_integer_bits,
+                   int output_integer_bits, const string& check_label,
+                   int tolerance) {
+#define OUTPUT_CASE(K)                                                   \
+  case K:                                                                \
+    return RunSingleTest<K>(test_input, input_integer_bits, check_label, \
+                            tolerance)
+  switch (output_integer_bits) {
+    OUTPUT_CASE(0);
+    OUTPUT_CASE(1);
+    OUTPUT_CASE(2);
+    OUTPUT_CASE(3);
+    OUTPUT_CASE(4);
+    OUTPUT_CASE(5);
+    OUTPUT_CASE(6);
+    OUTPUT_CASE(7);
+    OUTPUT_CASE(8);
+    OUTPUT_CASE(9);
+    OUTPUT_CASE(10);
+    OUTPUT_CASE(11);
+    OUTPUT_CASE(12);
+    OUTPUT_CASE(13);
+    OUTPUT_CASE(14);
+    OUTPUT_CASE(15);
+    OUTPUT_CASE(16);
+    OUTPUT_CASE(17);
+    OUTPUT_CASE(18);
+    OUTPUT_CASE(19);
+    OUTPUT_CASE(20);
+    OUTPUT_CASE(21);
+    OUTPUT_CASE(22);
+    OUTPUT_CASE(23);
+    OUTPUT_CASE(24);
+    OUTPUT_CASE(25);
+    OUTPUT_CASE(26);
+    OUTPUT_CASE(27);
+    OUTPUT_CASE(28);
+    OUTPUT_CASE(29);
+    default:
+      ASSERT_LE(input_integer_bits, 30)
+                << "Input integer bits not handled: " << input_integer_bits;
+  }
+#undef OUTPUT_CASE
+}
+
+void RunUniformTest(int test_size, int input_integer_bits,
+                    int output_integer_bits, const string& check_label,
+                    int tolerance, NumberGenerator* generator) {
+  std::vector<int> test_data = generator->RandomIntVector(
+      test_size, 2, std::numeric_limits<int>::max() - 1);
+  test_data[0] = 2;
+  test_data[1] = 3;
+  test_data[2] = 4;
+  test_data[3] = std::numeric_limits<int32>::max() - 2;
+  test_data[4] = std::numeric_limits<int32>::max() - 1;
+  test_data[5] = std::numeric_limits<int32>::max();
+
+  RunSingleTest(test_data, input_integer_bits, output_integer_bits,
+                check_label + " / uniform test", tolerance);
+}
+
+void RunUniformShiftUniformTest(int test_size, int input_integer_bits,
+                                int output_integer_bits,
+                                const string& check_label, int tolerance,
+                                NumberGenerator* generator) {
+  std::vector<int> test_data = generator->RandomIntVector(
+      test_size, 2, std::numeric_limits<int>::max() - 1);
+  std::vector<int> shifts = generator->RandomIntVector(test_size, 0, 29);
+  RightShiftVector(shifts, &test_data);
+
+  RunSingleTest(test_data, input_integer_bits, output_integer_bits,
+                check_label + " / shifted test", tolerance);
+}
+
+TEST_F(LogQuantizedTest, VariedIntegerBits) {
+  static constexpr int kVariations = 250;
+  static constexpr int kRunSize = 250;
+  static constexpr int kIntegerTolerance = 8;
+  static constexpr double kOutputFloatTolerance = 7.0e-7;
+
+  std::vector<int> input_integer_bits =
+      generator_.RandomIntVector(kVariations, 0, 24);
+  std::vector<int> output_integer_bits =
+      generator_.RandomIntVector(kVariations, 1, 10);
+
+  for (int i = 0; i < kVariations; ++i) {
+    int var_output_integer_bits = output_integer_bits[i];
+    int tolerance =
+        std::max(1.0 * kIntegerTolerance,
+                 (1 << (31 - var_output_integer_bits)) * kOutputFloatTolerance);
+
+    RunUniformTest(kRunSize, input_integer_bits[i], var_output_integer_bits,
+                   "VariedIntegerBits", tolerance, &generator_);
+    RunUniformShiftUniformTest(kRunSize, input_integer_bits[i],
+                               var_output_integer_bits, "VariedIntegerBits",
+                               tolerance, &generator_);
+  }
+}
+
+TEST_F(LogQuantizedTest, SelectedIntegerBits) {
+  static constexpr int kInputBits = 12;
+  static constexpr int kOutputBits = 5;
+  static constexpr int kRunSize = 100000;
+  static constexpr int kIntegerTolerance = 4;
+
+  RunUniformTest(kRunSize, kInputBits, kOutputBits, "SelectedIntegerBits",
+                 kIntegerTolerance, &generator_);
+  RunUniformShiftUniformTest(kRunSize, kInputBits, kOutputBits,
+                             "SelectedIntegerBits", kIntegerTolerance,
+                             &generator_);
+}
+
+}  // namespace
diff --git a/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc
new file mode 100644
index 0000000..b7531ea
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/logsoftmax_quantized_test.cc
@@ -0,0 +1,241 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <random>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/test_util.h"
+
+namespace tflite {
+namespace {
+
+void RunLogSoftmaxFloatReference(const uint8* input_data,
+                                 const Dims<4>& dims_common, int32 input_offset,
+                                 const double input_scale, int stride,
+                                 float beta, uint8* reference_output_data) {
+  const int ref_buffer_size = RequiredBufferSizeForDims(dims_common);
+  std::vector<float> reference_dequant_data(ref_buffer_size);
+  std::vector<float> reference_output_float_data(ref_buffer_size);
+
+  // Reference data generated via Dequant of input into float, and then applying
+  // float LogSoftmax.
+  reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale,
+                            reference_dequant_data.data(), dims_common);
+  optimized_ops::LogSoftmax(reference_dequant_data.data(), dims_common,
+                            reference_output_float_data.data(), dims_common);
+  // Work with quantized scaling for LogSoftmax, under which 255 represents 0,
+  // and -16 gets nudged up to 0.
+  for (int i = 0; i < ref_buffer_size; i++) {
+    reference_output_data[i] = std::max(
+        0, static_cast<int>(
+               255 + std::round(16.0f * reference_output_float_data[i])));
+  }
+}
+
+void CheckOutputData(const uint8* test_output, const uint8* reference_output,
+                     const Dims<4>& dims_common, const string& check_label,
+                     bool be_exacting) {
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+  // While calculating some metrics in floating point, we work with quantized
+  // scaling.
+  std::vector<int> diff(buffer_size);
+  int64_t sum_diff = 0;
+  int64_t sum_abs_diff = 0;
+  for (int i = 0; i < buffer_size; i++) {
+    diff[i] = static_cast<int>(test_output[i]) - reference_output[i];
+    sum_diff += diff[i];
+    sum_abs_diff += std::abs(diff[i]);
+  }
+  // These stats help understand test failures.
+  std::sort(std::begin(diff), std::end(diff));
+  const int min_diff = diff.front();
+  const int max_diff = diff.back();
+  const int median_diff = diff[diff.size() / 2];
+  const float mean_diff = static_cast<float>(sum_diff) / buffer_size;
+  const float mean_abs_diff = static_cast<float>(sum_abs_diff) / buffer_size;
+  // We either check for bit exactness (against the reference quantized version)
+  // or for general accuracy, allowing off-by-one (against the float reference).
+  if (be_exacting) {
+    ASSERT_TRUE(std::abs(min_diff) == 0 && std::abs(max_diff) == 0)
+        << check_label << ": "
+        << "std::abs(min_diff)=" << std::abs(min_diff)
+        << ", std::abs(max_diff)=" << std::abs(max_diff);
+  } else {
+    // For small numbers of samples, the estimates of the means vary more.
+    // Rather than widen the tolerances, we skip the smaller tests.
+    ASSERT_TRUE(((std::abs(mean_diff) < 2e-2f && mean_abs_diff < 3e-2f) ||
+                 buffer_size < 10000) &&
+                std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 &&
+                std::abs(max_diff) <= 1)
+        << check_label << ": "
+        << "buffer_size=" << buffer_size << ", mean_diff=" << mean_diff
+        << ", mean_abs_diff=" << mean_abs_diff
+        << ", median_diff=" << median_diff << ", min_diff=" << min_diff
+        << ", max_diff=" << max_diff;
+  }
+}
+
+// Runs the LogSoftmax and compares against the float reference implementation
+// and the quantized reference implementation.
+void RunOneLogSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common,
+                          int32 input_offset, const double input_scale,
+                          int stride, float beta) {
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+  std::vector<uint8> optimized_logsoftmax_output(buffer_size);
+  std::vector<uint8> reference_float_logsoftmax_output(buffer_size);
+  std::vector<uint8> reference_quant_logsoftmax_output(buffer_size);
+
+  RunLogSoftmaxFloatReference(input_data, dims_common, input_offset,
+                              input_scale, stride, beta,
+                              reference_float_logsoftmax_output.data());
+
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  int32 reverse_scaling_divisor;
+  int reverse_scaling_right_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessLogSoftmaxScaling(
+      beta, input_scale, kScaledDiffIntegerBits, &input_beta_multiplier,
+      &input_beta_left_shift, &reverse_scaling_divisor,
+      &reverse_scaling_right_shift);
+  // diff_min has a negative value, and is used to limit the maximum magnitude
+  // of the diffs, which are <= 0.
+  const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
+                                                     input_beta_left_shift);
+
+  optimized_ops::LogSoftmax(input_data, dims_common, input_beta_multiplier,
+                            input_beta_left_shift, reverse_scaling_divisor,
+                            reverse_scaling_right_shift, diff_min,
+                            optimized_logsoftmax_output.data(), dims_common);
+  reference_ops::LogSoftmax(
+      input_data, dims_common, input_beta_multiplier, input_beta_left_shift,
+      reverse_scaling_divisor, reverse_scaling_right_shift, diff_min,
+      reference_quant_logsoftmax_output.data(), dims_common);
+
+  CheckOutputData(optimized_logsoftmax_output.data(),
+                  reference_float_logsoftmax_output.data(), dims_common,
+                  "Optimized vs float reference", false);
+  CheckOutputData(optimized_logsoftmax_output.data(),
+                  reference_quant_logsoftmax_output.data(), dims_common,
+                  "Optimized vs quant reference", true);
+  CheckOutputData(reference_quant_logsoftmax_output.data(),
+                  reference_float_logsoftmax_output.data(), dims_common,
+                  "Quant reference vs float reference", false);
+}
+
+// This function picks some random LogSoftmax params, which are checked for
+// desirability.  If not acceptable, it returns false. If they're OK,
+// it runs the LogSoftmax test and returns true. This allows the caller
+// to loop until a test has been run.
+//
+// Currently we do not reject for any reason.
+bool TryOneUniformLogSoftmax() {
+  // We pick mostly positive values, on the whole emphasizing smaller values and
+  // therefore faster tests.  We test a wider range of depths.  In the case of
+  // LogSoftmax, the width and height really just create test repetitions.
+  const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+  const int input_depth = ExponentialRandomPositiveInt(0.75f, 175, 500);
+  const int input_width = ExponentialRandomPositiveInt(0.8f, 20, 200);
+  const int input_height = ExponentialRandomPositiveInt(0.8f, 20, 200);
+  const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
+  const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0));
+  const int32 input_offset = UniformRandomInt(-256, 0);
+  static constexpr float beta = 1.0f;
+
+  Dims<4> dims_common =
+      MakeDimsForInference(input_depth, input_width, input_height, batch);
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+
+  std::vector<uint8> input_data(buffer_size);
+  FillRandom(&input_data);
+  RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset,
+                       input_scale, stride, beta);
+  return true;
+}
+
+// See TryOneUniformLogSoftmax() for a general description.
+//
+// Tests with "skyscraper" input patterns are included for two reasons. (a)
+// Bimodal distributions are potentially challenging and perhaps more
+// realistic than simple uniform random inputs.  (b) Some implementations of
+// LogSoftmax may adapt as they traverse the depth, and so we test handling of
+// cases where relatively small values are encountered at the beginning and end.
+bool TryOneSkyscraperLogSoftmax(bool small_depth) {
+  // We pick mostly positive values, on the whole emphasizing smaller values and
+  // therefore faster tests.  We test a wider range of depths.  In the case of
+  // LogSoftmax, the width and height really just create test repetitions.
+  const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+  const int input_depth = small_depth
+                              ? ExponentialRandomPositiveInt(0.75f, 40, 500)
+                              : ExponentialRandomPositiveInt(0.75f, 175, 500);
+  const int input_width = ExponentialRandomPositiveInt(0.7f, 20, 200);
+  const int input_height = ExponentialRandomPositiveInt(0.7f, 20, 200);
+  const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
+  const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0));
+  const int32 input_offset = UniformRandomInt(-256, 0);
+  static constexpr float beta = 1.0f;
+  // Extra parameters for skyscraper input patterns.
+  const double middle_proportion =
+      ExponentialRandomPositiveFloat(0.65f, 0.1, 1.0);
+  const int middle_min = UniformRandomInt(0, 255);
+  const int sides_max = UniformRandomInt(0, middle_min);
+
+  Dims<4> dims_common =
+      MakeDimsForInference(input_depth, input_width, input_height, batch);
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+
+  std::vector<uint8> input_data(buffer_size);
+  FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min,
+                       sides_max);
+  RunOneLogSoftmaxTest(input_data.data(), dims_common, input_offset,
+                       input_scale, stride, beta);
+  return true;
+}
+
+TEST(TestQuantizedLogSoftmax, UniformLogSoftmaxTests) {
+  const int kTestsToRun = 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    while (!TryOneUniformLogSoftmax()) {
+    }
+  }
+}
+
+TEST(TestQuantizedLogSoftmax, SkyscraperLogSoftmaxTests) {
+  const int kTestsToRun = 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    while (!TryOneSkyscraperLogSoftmax(false)) {
+    }
+  }
+}
+
+TEST(TestQuantizedLogSoftmax, SmallSkyscraperLogSoftmaxTests) {
+  const int kTestsToRun = 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    while (!TryOneSkyscraperLogSoftmax(true)) {
+    }
+  }
+}
+}  // namespace
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc
new file mode 100644
index 0000000..c1c50df
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/resize_bilinear_float_test.cc
@@ -0,0 +1,102 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/test_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+
+namespace tflite {
+namespace {
+void TestOneResizeBilinear(int batch, int depth, int input_width,
+                           int input_height, int output_width,
+                           int output_height) {
+  Dims<4> input_dims_inference =
+      MakeDimsForInference(depth, input_width, input_height, batch);
+  Dims<4> output_dims_inference =
+      MakeDimsForInference(depth, output_width, output_height, batch);
+
+  const int input_buffer_size = RequiredBufferSizeForDims(input_dims_inference);
+  const int output_buffer_size =
+      RequiredBufferSizeForDims(output_dims_inference);
+
+  std::vector<float> input_data(input_buffer_size, 0);
+  std::vector<float> reference_output_data(output_buffer_size, 0);
+  // Initialize the output data with something other than zero, so we can catch
+  // issue with kernels failing to initialize the output.
+  std::vector<float> output_data(output_buffer_size, 3.1415);
+
+  const float input_amplitude = 1.f;
+  FillRandom(&input_data, -input_amplitude, input_amplitude);
+
+  Dims<4> output_size_dims = MakeDimsForInference(2, 1, 1, 1);
+  std::vector<int32> output_size_data = {output_height, output_width};
+
+  reference_ops::ResizeBilinear(
+      input_data.data(), input_dims_inference, output_size_data.data(),
+      output_size_dims, reference_output_data.data(), output_dims_inference);
+  optimized_ops::ResizeBilinear(input_data.data(), input_dims_inference,
+                                output_size_data.data(), output_size_dims,
+                                output_data.data(), output_dims_inference);
+
+  double sum_diff = 0;
+  float max_abs_val = 0;
+  for (int i = 0; i < output_buffer_size; i++) {
+    sum_diff += std::abs(output_data[i] - reference_output_data[i]);
+    max_abs_val = std::max(max_abs_val, std::abs(reference_output_data[i]));
+  }
+
+  if (sum_diff != 0.f) {
+    const float mean_diff = static_cast<float>(sum_diff / output_buffer_size);
+    const float relative_error = std::abs(mean_diff) / max_abs_val;
+    ASSERT_LT(relative_error, 1e-5f);
+  }
+}
+
+TEST(ResizeBilinear, TestResizeBilinear) {
+  const int kTestsToRun = 100 * 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+    const int depth = ExponentialRandomPositiveInt(0.9f, 6, 50);
+    const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200);
+    const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200);
+    const int output_width = ExponentialRandomPositiveInt(0.9f, 20, 200);
+    const int output_height = ExponentialRandomPositiveInt(0.9f, 20, 200);
+
+    TestOneResizeBilinear(batch, depth, input_width, input_height, output_width,
+                          output_height);
+  }
+}
+
+TEST(ResizeBilinear2x2, TestResizeBilinear) {
+  const int kTestsToRun = 100 * 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+    const int depth = ExponentialRandomPositiveInt(0.9f, 6, 50);
+    const int input_width = ExponentialRandomPositiveInt(0.9f, 20, 200);
+    const int input_height = ExponentialRandomPositiveInt(0.9f, 20, 200);
+    const int output_width = input_width * 2;
+    const int output_height = input_height * 2;
+
+    TestOneResizeBilinear(batch, depth, input_width, input_height, output_width,
+                          output_height);
+  }
+}
+}  // namespace
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc
new file mode 100644
index 0000000..d781a7b
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/softmax_quantized_test.cc
@@ -0,0 +1,227 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <algorithm>
+#include <cmath>
+#include <cstdlib>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <random>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h"
+#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h"
+#include "tensorflow/contrib/lite/kernels/internal/test_util.h"
+
+namespace tflite {
+namespace {
+
+void RunSoftmaxFloatReference(const uint8* input_data,
+                              const Dims<4>& dims_common, int32 input_offset,
+                              const double input_scale, int stride, float beta,
+                              uint8* reference_output_data) {
+  const int ref_buffer_size = RequiredBufferSizeForDims(dims_common);
+  std::vector<float> reference_dequant_data(ref_buffer_size);
+  std::vector<float> reference_output_float_data(ref_buffer_size);
+
+  // Reference data generated via Dequant of input into float, and then applying
+  // float Softmax.
+  reference_ops::Dequantize(input_data, dims_common, input_offset, input_scale,
+                            reference_dequant_data.data(), dims_common);
+  optimized_ops::Softmax(reference_dequant_data.data(), dims_common, beta,
+                         reference_output_float_data.data(), dims_common);
+  // Work with quantized scaling for Softmax, under which 256 represents 1, but
+  // we limit this to 255.
+  for (int i = 0; i < ref_buffer_size; i++) {
+    reference_output_data[i] = std::min(
+        255,
+        static_cast<int>(std::round(256.0f * reference_output_float_data[i])));
+  }
+}
+
+void CheckOutputData(const uint8* test_output, const uint8* reference_output,
+                     const Dims<4>& dims_common, const string& check_label,
+                     bool be_exacting) {
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+  // While calculating some metrics in floating point, we work with quantized
+  // scaling.
+  std::vector<int> diff(buffer_size);
+  int64_t sum_diff = 0;
+  int64_t sum_abs_diff = 0;
+  for (int i = 0; i < buffer_size; i++) {
+    diff[i] = static_cast<int>(test_output[i]) - reference_output[i];
+    sum_diff += diff[i];
+    sum_abs_diff += std::abs(diff[i]);
+  }
+  // These stats help understand test failures.
+  std::sort(std::begin(diff), std::end(diff));
+  const int min_diff = diff.front();
+  const int max_diff = diff.back();
+  const int median_diff = diff[diff.size() / 2];
+  const float mean_diff = static_cast<float>(sum_diff) / buffer_size;
+  const float mean_abs_diff = static_cast<float>(sum_abs_diff) / buffer_size;
+  // We either check for bit exactness (against the reference quantized version)
+  // or for general accuracy, allowing off-by-one (against the float reference).
+  if (be_exacting) {
+    ASSERT_TRUE(std::abs(min_diff) == 0 && std::abs(max_diff) == 0);
+  } else {
+    // For small numbers of samples, the estimates of the means vary more.
+    // Rather than widen the tolerances, we skip the smaller tests.
+    ASSERT_TRUE(((std::abs(mean_diff) < 2e-2f && mean_abs_diff < 3e-2f) ||
+                 buffer_size < 10000) &&
+                std::abs(median_diff) == 0 && std::abs(min_diff) <= 1 &&
+                std::abs(max_diff) <= 1);
+  }
+}
+
+// Runs the Softmax and compares against the float reference implementation and
+// the quantized reference implementation.
+void RunOneSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common,
+                       int32 input_offset, const double input_scale, int stride,
+                       float beta) {
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+  std::vector<uint8> optimized_softmax_output(buffer_size);
+  std::vector<uint8> reference_float_softmax_output(buffer_size);
+  std::vector<uint8> reference_quant_softmax_output(buffer_size);
+
+  RunSoftmaxFloatReference(input_data, dims_common, input_offset, input_scale,
+                           stride, beta, reference_float_softmax_output.data());
+
+  int32 input_beta_multiplier;
+  int input_beta_left_shift;
+  static const int kScaledDiffIntegerBits = 5;
+  tflite::PreprocessSoftmaxScaling(beta, input_scale, kScaledDiffIntegerBits,
+                                   &input_beta_multiplier,
+                                   &input_beta_left_shift);
+  // diff_min has a negative value, and is used to limit the maximum magnitude
+  // of the diffs, which are <= 0.
+  const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
+                                                     input_beta_left_shift);
+
+  optimized_ops::Softmax(input_data, dims_common, input_beta_multiplier,
+                         input_beta_left_shift, diff_min,
+                         optimized_softmax_output.data(), dims_common);
+  reference_ops::Softmax(input_data, dims_common, input_beta_multiplier,
+                         input_beta_left_shift, diff_min,
+                         reference_quant_softmax_output.data(), dims_common);
+
+  CheckOutputData(optimized_softmax_output.data(),
+                  reference_float_softmax_output.data(), dims_common,
+                  "Optimized vs float reference", false);
+  CheckOutputData(optimized_softmax_output.data(),
+                  reference_quant_softmax_output.data(), dims_common,
+                  "Optimized vs quant reference", true);
+  CheckOutputData(reference_quant_softmax_output.data(),
+                  reference_float_softmax_output.data(), dims_common,
+                  "Quant reference vs float reference", false);
+}
+
+// This function picks some random Softmax params, which are checked for
+// desirability.  If not acceptable, it returns false. If they're OK,
+// it runs the Softmax test and returns true. This allows the caller
+// to loop until a test has been run.
+//
+// Currently we do not reject for any reason.
+bool TryOneUniformSoftmax() {
+  // We pick mostly positive values, on the whole emphasizing smaller values and
+  // therefore faster tests.  We test a wider range of depths.  In the case of
+  // Softmax, the width and height really just create test repetitions.
+  const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+  const int input_depth = ExponentialRandomPositiveInt(0.75f, 175, 500);
+  const int input_width = ExponentialRandomPositiveInt(0.8f, 20, 200);
+  const int input_height = ExponentialRandomPositiveInt(0.8f, 20, 200);
+  const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
+  const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0));
+  const int32 input_offset = UniformRandomInt(-256, 0);
+  const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10);
+
+  Dims<4> dims_common =
+      MakeDimsForInference(input_depth, input_width, input_height, batch);
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+
+  std::vector<uint8> input_data(buffer_size);
+  FillRandom(&input_data);
+  RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale,
+                    stride, beta);
+  return true;
+}
+
+// See TryOneUniformSoftmax() for a general description.
+//
+// Tests with "skyscraper" input patterns are included for two reasons. (a)
+// Bimodal distributions are potentially challenging and perhaps more
+// realistic than simple uniform random inputs.  (b) Some implementations of
+// Softmax may adapt as they traverse the depth, and so we test handling of
+// cases where relatively small values are encountered at the beginning and end.
+bool TryOneSkyscraperSoftmax(bool small_depth) {
+  // We pick mostly positive values, on the whole emphasizing smaller values and
+  // therefore faster tests.  We test a wider range of depths.  In the case of
+  // Softmax, the width and height really just create test repetitions.
+  const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
+  const int input_depth = small_depth
+                              ? ExponentialRandomPositiveInt(0.75f, 40, 500)
+                              : ExponentialRandomPositiveInt(0.75f, 175, 500);
+  const int input_width = ExponentialRandomPositiveInt(0.7f, 20, 200);
+  const int input_height = ExponentialRandomPositiveInt(0.7f, 20, 200);
+  const int stride = ExponentialRandomPositiveInt(0.9f, 3, 8);
+  const double input_scale = std::pow(10.0, UniformRandomFloat(-2.0, 1.0));
+  const int32 input_offset = UniformRandomInt(-256, 0);
+  const float beta = 1.0f + ExponentialRandomPositiveFloat(0.9f, 2, 10);
+  // Extra parameters for skyscraper input patterns.
+  const double middle_proportion =
+      ExponentialRandomPositiveFloat(0.65f, 0.1, 1.0);
+  const int middle_min = UniformRandomInt(0, 255);
+  const int sides_max = UniformRandomInt(0, middle_min);
+
+  Dims<4> dims_common =
+      MakeDimsForInference(input_depth, input_width, input_height, batch);
+  const int buffer_size = RequiredBufferSizeForDims(dims_common);
+
+  std::vector<uint8> input_data(buffer_size);
+  FillRandomSkyscraper(&input_data, input_depth, middle_proportion, middle_min,
+                       sides_max);
+  RunOneSoftmaxTest(input_data.data(), dims_common, input_offset, input_scale,
+                    stride, beta);
+  return true;
+}
+
+TEST(TestQuantizedSoftmax, UniformSoftmaxTests) {
+  const int kTestsToRun = 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    while (!TryOneUniformSoftmax()) {
+    }
+  }
+}
+
+TEST(TestQuantizedSoftmax, SkyscraperSoftmaxTests) {
+  const int kTestsToRun = 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    while (!TryOneSkyscraperSoftmax(false)) {
+    }
+  }
+}
+
+TEST(TestQuantizedSoftmax, SmallSkyscraperSoftmaxTests) {
+  const int kTestsToRun = 1000;
+  for (int i = 0; i < kTestsToRun; i++) {
+    while (!TryOneSkyscraperSoftmax(true)) {
+    }
+  }
+}
+}  // namespace
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.cc b/tensorflow/contrib/lite/kernels/internal/test_util.cc
new file mode 100644
index 0000000..9b1fd9b
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/test_util.cc
@@ -0,0 +1,121 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/kernels/internal/test_util.h"
+
+#include <cmath>
+#include <iterator>
+
+namespace tflite {
+
+Dims<4> MakeDimsForInference(int depth, int width, int height, int batch) {
+  Dims<4> result;
+  int cum_prod = 1;
+
+  result.sizes[0] = depth;
+  result.strides[0] = cum_prod;
+  cum_prod *= result.sizes[0];
+
+  result.sizes[1] = width;
+  result.strides[1] = cum_prod;
+  cum_prod *= result.sizes[1];
+
+  result.sizes[2] = height;
+  result.strides[2] = cum_prod;
+  cum_prod *= result.sizes[2];
+
+  result.sizes[3] = batch;
+  result.strides[3] = cum_prod;
+
+  return result;
+}
+
+// this is a copied from an internal function in propagate_fixed_sizes.cc
+bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
+                      int filter_height, int stride, PaddingType padding_type,
+                      Dims<4>* output_dims, int* pad_width, int* pad_height) {
+  const int input_width = ArraySize(input_dims, 1);
+  const int input_height = ArraySize(input_dims, 2);
+  const int batch = ArraySize(input_dims, 3);
+
+  int output_height = 0;
+  int output_width = 0;
+  if (padding_type == PaddingType::kValid) {
+    output_height = (input_height + stride - filter_height) / stride;
+    output_width = (input_width + stride - filter_width) / stride;
+  } else if (padding_type == PaddingType::kSame) {
+    output_height = (input_height + stride - 1) / stride;
+    output_width = (input_width + stride - 1) / stride;
+  } else {
+    return false;
+  }
+
+  if (output_width <= 0 || output_height <= 0) {
+    return false;
+  }
+
+  *pad_height =
+      ((output_height - 1) * stride + filter_height - input_height) / 2;
+  *pad_width = ((output_width - 1) * stride + filter_width - input_width) / 2;
+  *output_dims =
+      MakeDimsForInference(output_depth, output_width, output_height, batch);
+  return true;
+}
+
+std::mt19937& RandomEngine() {
+  static std::mt19937 engine;
+  return engine;
+}
+
+int UniformRandomInt(int min, int max) {
+  std::uniform_int_distribution<int> dist(min, max);
+  return dist(RandomEngine());
+}
+
+float UniformRandomFloat(float min, float max) {
+  std::uniform_real_distribution<float> dist(min, max);
+  return dist(RandomEngine());
+}
+
+int ExponentialRandomPositiveInt(float percentile, int percentile_val,
+                                 int max_val) {
+  const float lambda =
+      -std::log(1.f - percentile) / static_cast<float>(percentile_val);
+  std::exponential_distribution<float> dist(lambda);
+  float val;
+  do {
+    val = dist(RandomEngine());
+  } while (!val || !std::isfinite(val) || val > max_val);
+  return static_cast<int>(std::ceil(val));
+}
+
+float ExponentialRandomPositiveFloat(float percentile, float percentile_val,
+                                     float max_val) {
+  const float lambda =
+      -std::log(1.f - percentile) / static_cast<float>(percentile_val);
+  std::exponential_distribution<float> dist(lambda);
+  float val;
+  do {
+    val = dist(RandomEngine());
+  } while (!std::isfinite(val) || val > max_val);
+  return val;
+}
+
+void FillRandom(std::vector<float>* vec, float min, float max) {
+  std::uniform_real_distribution<float> dist(min, max);
+  auto gen = std::bind(dist, RandomEngine());
+  std::generate(std::begin(*vec), std::end(*vec), gen);
+}
+
+}  // namespace tflite
diff --git a/tensorflow/contrib/lite/kernels/internal/test_util.h b/tensorflow/contrib/lite/kernels/internal/test_util.h
new file mode 100644
index 0000000..26078ce
--- /dev/null
+++ b/tensorflow/contrib/lite/kernels/internal/test_util.h
@@ -0,0 +1,104 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_
+#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+#include <limits>
+#include <random>
+#include <vector>
+
+#include "tensorflow/contrib/lite/kernels/internal/types.h"
+
+namespace tflite {
+
+// Creates a Dims struct from a set of dimensions.
+Dims<4> MakeDimsForInference(int depth, int width, int height, int batch);
+
+// Computes output and padding dimensions.
+bool ComputeConvSizes(Dims<4> input_dims, int output_depth, int filter_width,
+                      int filter_height, int stride, PaddingType padding_type,
+                      Dims<4>* output_dims, int* pad_width, int* pad_height);
+
+// Returns a mt19937 random engine.
+std::mt19937& RandomEngine();
+
+// Returns a random integer uniformly distributed between |min| and |max|.
+int UniformRandomInt(int min, int max);
+
+// Returns a random float uniformly distributed between |min| and |max|.
+float UniformRandomFloat(float min, float max);
+
+// Returns a random element in |v|.
+template <typename T>
+const T& RandomElement(const std::vector<T>& v) {
+  return v[UniformRandomInt(0, v.size() - 1)];
+}
+
+// Returns a random exponentially distributed integer.
+int ExponentialRandomPositiveInt(float percentile, int percentile_val,
+                                 int max_val);
+
+// Returns a random exponentially distributed float.
+float ExponentialRandomPositiveFloat(float percentile, float percentile_val,
+                                     float max_val);
+
+// Fills a vector with random floats between |min| and |max|.
+void FillRandom(std::vector<float>* vec, float min, float max);
+
+// Fills a vector with random numbers between |min| and |max|.
+template <typename T>
+void FillRandom(std::vector<T>* vec, T min, T max) {
+  std::uniform_int_distribution<T> dist(min, max);
+  auto gen = std::bind(dist, RandomEngine());
+  std::generate(std::begin(*vec), std::end(*vec), gen);
+}
+
+// Fills a vector with random numbers.
+template <typename T>
+void FillRandom(std::vector<T>* vec) {
+  FillRandom(vec, std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
+}
+
+template <typename T>
+void FillRandom(typename std::vector<T>::iterator begin_it,
+                typename std::vector<T>::iterator end_it, T min, T max) {
+  std::uniform_int_distribution<T> dist(min, max);
+  auto gen = std::bind(dist, RandomEngine());
+  std::generate(begin_it, end_it, gen);
+}
+
+// Fill with a "skyscraper" pattern, in which there is a central section (across
+// the depth) with higher values than the surround.
+template <typename T>
+void FillRandomSkyscraper(std::vector<T>* vec, int depth,
+                          double middle_proportion, uint8 middle_min,
+                          uint8 sides_max) {
+  for (auto base_it = std::begin(*vec); base_it != std::end(*vec);
+       base_it += depth) {
+    auto left_it = base_it + std::ceil(0.5 * depth * (1.0 - middle_proportion));
+    auto right_it =
+        base_it + std::ceil(0.5 * depth * (1.0 + middle_proportion));
+    FillRandom(base_it, left_it, std::numeric_limits<T>::min(), sides_max);
+    FillRandom(left_it, right_it, middle_min, std::numeric_limits<T>::max());
+    FillRandom(right_it, base_it + depth, std::numeric_limits<T>::min(),
+               sides_max);
+  }
+}
+
+}  // namespace tflite
+#endif  // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_TEST_UTIL_H_
diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h
index 43c6883..d5293ed 100644
--- a/tensorflow/contrib/lite/kernels/internal/types.h
+++ b/tensorflow/contrib/lite/kernels/internal/types.h
@@ -20,6 +20,7 @@ limitations under the License.
 namespace tflite {
 
 enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu };
+enum class PaddingType { kNone, kSame, kValid };
 
 // Quantization parameters, determining the mapping of quantized values
 // to real values (i.e. determining how quantized values are mathematically
-- 
2.7.4