From 31c544c8e5b04f4bcf544018fcfcacbee5294025 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 25 May 2018 22:46:46 -0700 Subject: [PATCH] DepthwiseConv Optimizations PiperOrigin-RevId: 198144118 --- .../contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h | 4 +++- .../lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index b85e6c4..3fd00c8 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1697,7 +1697,9 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, #endif TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); -#ifdef __aarch64__ +// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on +// Jetson TX-2. This compiler does not support the offsetof() macro. +#if defined(__aarch64__) && !defined(GOOGLE_L4T) // Call kernel optimized for depthwise convolutions using 3x3 filters if // parameters are supported. if (Fast3x3FilterKernelSupported( diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h index 51fbd54..8cd7223 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -23,7 +23,9 @@ limitations under the License. namespace tflite { namespace optimized_ops { -#ifdef __aarch64__ +// Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on +// Jetson TX-2. This compiler does not support the offsetof() macro. +#if defined(__aarch64__) && !defined(GOOGLE_L4T) // clang-format gets confused with this file and ends up formatting lines to // be larger than 80 characters. Turn off here and back on at the end of the -- 2.7.4