From 67d060a8cb648a9951375a744cd99e1640436cbc Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/On-Device=20Lab=28SR=29/Enginee?=
 =?utf8?q?r/=EC=82=BC=EC=84=B1=EC=A0=84=EC=9E=90?= <jiseob.jang@samsung.com>
Date: Fri, 2 Aug 2019 10:40:52 +0900
Subject: [PATCH] Support TransposeConv op for acl_neon (#5786)

This commit supports TransposeConv op for acl_neon backend.
  - Introduce NETransposeConvLayer
  - Introduce CPPUpsampeEx
  - Introduce CPPUpsampeKernelEx
  - Apply NETransposeConvLayer for acl_neon backend
  - Enable nnapi tests for TransposeConv

Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
---
 .../core/CPP/kernels/CPPUpsampleKernelEx.h         |  72 +++++
 .../runtime/CPP/functions/CPPUpsampleEx.h          |  49 ++++
 .../runtime/NEON/functions/NETransposeConvLayer.h  | 162 +++++++++++
 .../src/core/CPP/kernels/CPPUpsampleKernelEx.cpp   | 102 +++++++
 .../src/runtime/CPP/functions/CPPUpsampleEx.cpp    |  37 +++
 .../NEON/functions/NETransposeConvLayer.cpp        | 307 +++++++++++++++++++++
 .../neurun/backend/acl_neon/ConstantInitializer.cc |   7 +
 .../neurun/backend/acl_neon/ConstantInitializer.h  |   1 +
 .../neurun/backend/acl_neon/KernelGenerator.cc     |  49 ++++
 runtimes/neurun/backend/acl_neon/KernelGenerator.h |   1 +
 runtimes/neurun/backend/acl_neon/ShapeFixer.cc     |   2 +
 runtimes/neurun/backend/acl_neon/ShapeFixer.h      |   1 +
 tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon |   1 -
 .../neurun_frameworktest_list.armv7l.acl_neon.txt  |   1 +
 14 files changed, 791 insertions(+), 1 deletion(-)
 create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
 create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
 create mode 100644 runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
 create mode 100644 runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
 create mode 100644 runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
 create mode 100644 runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp

diff --git a/runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h b/runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
new file mode 100644
index 0000000..d093c22
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
+#define __ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__
+
+#include "arm_compute/core/CPP/ICPPKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** CPP kernel to perform tensor upsample.
+ *
+ */
+class CPPUpsampleKernelEx : public ICPPKernel
+{
+public:
+  const char *name() const override { return "CPPUpsampleKernelEx"; }
+  /** Default constructor */
+  CPPUpsampleKernelEx();
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CPPUpsampleKernelEx(const CPPUpsampleKernelEx &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  CPPUpsampleKernelEx &operator=(const CPPUpsampleKernelEx &) = delete;
+  /** Allow instances of this class to be moved */
+  CPPUpsampleKernelEx(CPPUpsampleKernelEx &&) = default;
+  /** Allow instances of this class to be moved */
+  CPPUpsampleKernelEx &operator=(CPPUpsampleKernelEx &&) = default;
+  /** Default destructor */
+  ~CPPUpsampleKernelEx() = default;
+
+  /** Set the input and output of the kernel.
+   *
+   * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8
+   * @param[out] output The output tensor. Data types supported: Same as @p input
+   * @param[in]  info   Padding info.
+   */
+  void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
+
+  // Inherited methods overridden:
+  void run(const Window &window, const ThreadInfo &info) override;
+  bool is_parallelisable() const override;
+
+private:
+  const ITensor *_input;
+  ITensor *_output;
+  PadStrideInfo _info;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CPPUPSAMPLEKERNEL_EX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
new file mode 100644
index 0000000..8e7e2f9
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/CPP/functions/CPPUpsampleEx.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
+#define __ARM_COMPUTE_CPPUPSAMPLE_EX_H__
+
+#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref CPPUpsample */
+class CPPUpsampleEx : public ICPPSimpleFunction
+{
+public:
+  /** Configure the upsample CPP kernel
+   *
+   * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8
+   * @param[out] output The output tensor. Data types supported: Same as @p input
+   * @param[in]  info   Padding information
+   */
+  void configure(const ITensor *input, ITensor *output, const PadStrideInfo &info);
+};
+}
+#endif /* __ARM_COMPUTE_CPPUPSAMPLE_EX_H__ */
diff --git a/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
new file mode 100644
index 0000000..a50b9ea
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/arm_compute/runtime/NEON/functions/NETransposeConvLayer.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
+#define __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__
+
+#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+
+#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Function to run the deconvolution layer.
+ *
+ * Transpose convolution Layer is the backward pass of Convolution Layer. First we transform the
+ * input depending on the stride and pad info and then perfrom a 1x1
+ * convolution pass. Input stride defines how many zeroes we should put between each element of the
+ * input, pad is the amount of padding and finaly a is a user
+ * specified value where a < stride - 1 that increases the padding top and right of the input image.
+ *
+ *  The relation between input to output is as follows:
+ *  \f[
+ *       width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ *  \f]
+ *  \f[
+ *       height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ *  \f]
+ *
+ *  where
+ *      width is the size of the first input dimension.
+ *      height is the size of the second input dimension.
+ *      width_output is the size of the first output dimension.
+ *      height_output is the size of the second output dimension.
+ *      kernel_x and kernel_y are the convolution sizes in x and y.
+ *      stride_x and stride_y is the input stride of the first and second dimension.
+ *
+ * The weights used by Transpose convolution are supposed to be the same as the ones used for
+ * Convolution. Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using the @ref
+ * CPPFlipWeightsKernel.
+ *
+ * This function calls the following NEON kernels/functions:
+ *
+ * -# @ref CPPUpsample
+ * -# @ref NEConvolutionLayer
+ *
+ */
+class NETransposeConvLayer : public IFunction
+{
+public:
+  /** Default constructor */
+  NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NETransposeConvLayer(const NETransposeConvLayer &) = delete;
+  /** Prevent instances of this class from being copied (As this class contains pointers) */
+  NETransposeConvLayer &operator=(const NETransposeConvLayer &) = delete;
+  /** Allow instances of this class to be moved */
+  NETransposeConvLayer(NETransposeConvLayer &&) = default;
+  /** Allow instances of this class to be moved */
+  NETransposeConvLayer &operator=(NETransposeConvLayer &&) = default;
+  /** Default destructor */
+  virtual ~NETransposeConvLayer() = default;
+
+  /** Set the input, weights, biases and output tensors.
+   *
+   * @param[in,out] input   Input tensor. 3 lower dimensions represent a single input, and an
+   * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+   * @param[in]     weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type
+   * supported: Same as @p input.
+   * @param[in]     bias    Optional, ignored if NULL. The biases have one dimension. Data type
+   * supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+   * @param[out]    output  Output tensor. The output has the same number of dimensions as the @p
+   * input.
+   * @param[in]     info    Contains padding and policies to be used in the deconvolution, this is
+   * decribed in @ref PadStrideInfo.
+   * @param[in]     invalid_right  The number of zeros added to right edge of the output.
+   * @param[in]     invalid_bottom The number of zeros added to top edge of the output.
+   *
+   */
+  void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output,
+                 const PadStrideInfo &info, unsigned int invalid_right,
+                 unsigned int invalid_bottom);
+  /** Static function to check if given info will lead to a valid configuration of @ref
+   * NETransposeConvLayer
+   *
+   * @param[in] input   Input tensor info. 3 lower dimensions represent a single input, and an
+   * optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+   * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type
+   * supported: Same as @p input.
+   * @param[in] bias    (Optional) The biases have one dimension. Data type supported: Data types
+   * supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+   * @param[in] output  Output tensor info. The output has the same number of dimensions as the @p
+   * input.
+   * @param[in] info    Contains padding and policies to be used in the deconvolution, this is
+   * decribed in @ref PadStrideInfo.
+   * @param[in] innvalid_right  The number of zeros added to right edge of the output.
+   * @param[in] invalid_bottom  The number of zeros added to top edge of the output.
+   *
+   * @return a status
+   */
+  static Status validate(const ITensorInfo *input, const ITensorInfo *weights,
+                         const ITensorInfo *bias, const ITensorInfo *output,
+                         const PadStrideInfo &info, unsigned int invalid_right,
+                         unsigned int invalid_bottom);
+
+  // Inherited methods overridden:
+  void run() override;
+  void prepare() override;
+
+private:
+  MemoryGroup _memory_group;
+  NEConvolutionLayer _conv_f;
+  CPPUpsampleEx _upsample_f;
+  CPPFlipWeightsKernel _flip_weights;
+  NEPermute _permute_input;
+  NEPermute _permute_weights;
+  NEPermute _permute_output;
+  Tensor _scaled_output;
+  Tensor _weights_flipped;
+  Tensor _permuted_input;
+  Tensor _permuted_weights;
+  Tensor _permuted_output;
+  bool _is_nchw;
+  const ITensor *_original_weights;
+  ITensor *_input;
+  PadStrideInfo _info;
+  bool _is_prepared;
+};
+} // arm_compute
+#endif /* __ARM_COMPUTE_NETRANSPOSECONVLAYER_H__ */
diff --git a/runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
new file mode 100644
index 0000000..8ac667c
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+CPPUpsampleKernelEx::CPPUpsampleKernelEx() : _input(nullptr), _output(nullptr), _info() {}
+
+bool CPPUpsampleKernelEx::is_parallelisable() const { return false; }
+
+void CPPUpsampleKernelEx::configure(const ITensor *input, ITensor *output,
+                                    const PadStrideInfo &info)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+  _input = input;
+  _output = output;
+  _info = info;
+
+  // Configure kernel window
+  Window win = calculate_max_window(*input->info(), Steps());
+
+  // The CPPUpsampleKernelEx doesn't need padding so update_window_and_padding() can be skipped
+  Coordinates coord;
+  coord.set_num_dimensions(output->info()->num_dimensions());
+  output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+
+  ICPPKernel::configure(win);
+}
+
+void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info)
+{
+  ARM_COMPUTE_UNUSED(info);
+  ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+  ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
+
+  // Initialize _scaled_output buffer
+  const int width_scaled = _output->info()->dimension(0);
+  const int height_scaled = _output->info()->dimension(1);
+  const int stride_x = _info.stride().first;
+  const int stride_y = _info.stride().second;
+  const int start_x = _info.pad_left();
+  const int start_y = _info.pad_top();
+  const int end_y = height_scaled - _info.pad_bottom();
+  const int end_x = width_scaled - _info.pad_top();
+  const size_t element_size = _input->info()->element_size();
+
+  // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
+  const uint8_t fill_value =
+      _output->info()->data_type() == DataType::QASYMM8
+          ? utility::clamp<uint8_t>(_output->info()->quantization_info().offset)
+          : 0;
+  // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
+  // values in a buffer of uint8_ts
+  std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value);
+
+  // Create window
+  Window window_out(window);
+  window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x));
+  window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y));
+
+  // Create iterators
+  Iterator in(_input, window);
+  Iterator out(_output, window_out);
+
+  execute_window_loop(
+      window, [&](const Coordinates &) { memcpy(out.ptr(), in.ptr(), element_size); }, in, out);
+}
+} // namespace arm_compute
diff --git a/runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp b/runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
new file mode 100644
index 0000000..f8e0ef8
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/src/runtime/CPP/functions/CPPUpsampleEx.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CPP/functions/CPPUpsampleEx.h"
+
+#include "arm_compute/core/CPP/kernels/CPPUpsampleKernelEx.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void CPPUpsampleEx::configure(const ITensor *input, ITensor *output, const PadStrideInfo &info)
+{
+  auto k = arm_compute::support::cpp14::make_unique<CPPUpsampleKernelEx>();
+  k->configure(input, output, info);
+  _kernel = std::move(k);
+}
diff --git a/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
new file mode 100644
index 0000000..fd15ef0
--- /dev/null
+++ b/runtimes/libs/ARMComputeEx/src/runtime/NEON/functions/NETransposeConvLayer.cpp
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2017-2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NETransposeConvLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/UtilsEx.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/misc/ShapeCalculatorEx.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute::misc::shape_calculator;
+
+namespace arm_compute
+{
+NETransposeConvLayer::NETransposeConvLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
+    : _memory_group(std::move(memory_manager)),
+      _conv_f(),
+      _upsample_f(),
+      _flip_weights(),
+      _permute_input(),
+      _permute_weights(),
+      _permute_output(),
+      _scaled_output(),
+      _weights_flipped(),
+      _permuted_input(),
+      _permuted_weights(),
+      _permuted_output(),
+      _is_nchw(false),
+      _original_weights(nullptr),
+      _input(nullptr),
+      _info(),
+      _is_prepared(false)
+{
+}
+
+Status NETransposeConvLayer::validate(const ITensorInfo *input, const ITensorInfo *weights,
+                                      const ITensorInfo *bias, const ITensorInfo *output,
+                                      const PadStrideInfo &info, unsigned int invalid_right,
+                                      unsigned int invalid_bottom)
+{
+  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
+  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16,
+                                                       DataType::QASYMM8);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
+  const unsigned int width_idx =
+      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+  const unsigned int height_idx =
+      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
+  ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
+
+  auto out_dims = transposeconv_output_dimensions(
+      input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx),
+      weights->dimension(height_idx), info, invalid_right, invalid_bottom);
+
+  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+  if (is_data_type_quantized_asymmetric(input->data_type()) && bias)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
+  }
+  else if (bias)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
+  }
+
+  if (output->tensor_shape().total_size() > 0)
+  {
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+
+    const TensorShape output_shape = compute_transposeconv_output_shape(out_dims, *input, *weights);
+
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) < output_shape.x(),
+                                    "Output's dim 0 is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) < output_shape.y(),
+                                    "Output's dim 1 is invalid.");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) < output_shape.z(),
+                                    "Output's dim 2 is invalid.");
+  }
+
+  unsigned int pad_left = 0;
+  unsigned int pad_right = 0;
+  unsigned int pad_top = 0;
+  unsigned int pad_bottom = 0;
+  const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+      *input, *weights, info, out_dims, invalid_right, invalid_bottom, pad_left, pad_right, pad_top,
+      pad_bottom);
+  TensorInfo scale_out_info(
+      input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
+  scale_out_info.set_data_layout(input->data_layout());
+  const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+  const unsigned int batches_idx =
+      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
+  const unsigned int channel_idx =
+      get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
+  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) !=
+                              scale_out_info.dimension(batches_idx));
+  ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) !=
+                              scale_out_info.dimension(channel_idx));
+
+  ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output,
+                                                           conv_info, WeightsInfo()));
+
+  return Status{};
+}
+
+void NETransposeConvLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias,
+                                     ITensor *output, const PadStrideInfo &info,
+                                     unsigned int invalid_right, unsigned int invalid_bottom)
+{
+  ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+
+  const DataLayout data_layout = input->info()->data_layout();
+
+  _input = input;
+  _original_weights = weights;
+  _info = info;
+  _is_prepared = false;
+  _is_nchw = data_layout == DataLayout::NCHW;
+
+  const unsigned int stride_x = info.stride().first;
+  const unsigned int stride_y = info.stride().second;
+
+  const unsigned int width_idx =
+      get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+  const unsigned int height_idx =
+      get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+  auto out_dims = transposeconv_output_dimensions(
+      input->info()->dimension(width_idx), input->info()->dimension(height_idx),
+      weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info,
+      invalid_right, invalid_bottom);
+
+  const TensorShape output_shape =
+      compute_transposeconv_output_shape(out_dims, *input->info(), *weights->info());
+  // Output auto initialization if not yet initialized
+  auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(),
+                     input->info()->quantization_info());
+
+  // Perform validation step
+  ARM_COMPUTE_ERROR_THROW_ON(NETransposeConvLayer::validate(
+      input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(),
+      info, invalid_right, invalid_bottom));
+
+  _memory_group.manage(&_scaled_output);
+
+  if (!_is_nchw)
+  {
+    _memory_group.manage(&_permuted_input);
+    _memory_group.manage(&_permuted_weights);
+    _memory_group.manage(&_permuted_output);
+
+    // Configure the function to transform the input tensor from NHWC -> NCHW
+    _permuted_input.info()->set_quantization_info(input->info()->quantization_info());
+    _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
+    _permuted_input.info()->set_data_layout(DataLayout::NCHW);
+
+    // Configure the function to transform the weights tensor from NHWC -> NCHW
+    _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info());
+    _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
+    _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
+
+    // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
+    // order to match output shape
+
+    unsigned int pad_left = 0;
+    unsigned int pad_right = 0;
+    unsigned int pad_top = 0;
+    unsigned int pad_bottom = 0;
+    const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+        *_permuted_input.info(), *_permuted_weights.info(), info, out_dims, invalid_right,
+        invalid_bottom, pad_left, pad_right, pad_top, pad_bottom);
+
+    TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(),
+                              _permuted_input.info()->quantization_info());
+    scale_out_info.set_data_layout(DataLayout::NCHW);
+    _scaled_output.allocator()->init(scale_out_info);
+
+    const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+                                      DimensionRoundingType::CEIL);
+    _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info);
+
+    _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
+    _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
+    _flip_weights.configure(&_permuted_weights, &_weights_flipped);
+
+    // setup the function to convolve the upscaled output
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+    const auto out_shape = output->info()->tensor_shape();
+    TensorShape permuted_out_shape{out_shape[1], out_shape[2], out_shape[0], out_shape[3]};
+    TensorInfo permuted_out_info(permuted_out_shape, 1, output->info()->data_type(),
+                                 output->info()->quantization_info());
+    _permuted_output.allocator()->init(permuted_out_info);
+    _permuted_output.info()->set_data_layout(DataLayout::NCHW);
+    _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info);
+
+    // Configure the function to transform the convoluted output to NHWC
+    _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
+
+    _permuted_input.allocator()->allocate();
+    _permuted_weights.allocator()->allocate();
+    _permuted_output.allocator()->allocate();
+  }
+  else
+  {
+    // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in
+    // order to match output shape
+    unsigned int pad_left = 0;
+    unsigned int pad_right = 0;
+    unsigned int pad_top = 0;
+    unsigned int pad_bottom = 0;
+    const TensorShape scale_out_shape = compute_transposeconv_upsampled_shape(
+        *input->info(), *weights->info(), info, out_dims, invalid_right, invalid_bottom, pad_left,
+        pad_right, pad_top, pad_bottom);
+
+    TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(),
+                              input->info()->quantization_info());
+    _scaled_output.allocator()->init(scale_out_info);
+    const PadStrideInfo upsample_info(stride_x, stride_y, pad_left, pad_right, pad_top, pad_bottom,
+                                      DimensionRoundingType::FLOOR);
+    _upsample_f.configure(input, &_scaled_output, upsample_info);
+
+    _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+    _flip_weights.configure(weights, &_weights_flipped);
+
+    // setup the function to convolve the upscaled output
+    const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+    _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
+  }
+  _scaled_output.allocator()->allocate();
+}
+
+void NETransposeConvLayer::run()
+{
+  prepare();
+
+  // MemoryGroupResourceScope scope_mg(_memory_group);
+
+  // Permute input
+  if (!_is_nchw)
+  {
+    _permute_input.run();
+  }
+
+  _upsample_f.run();
+  _conv_f.run();
+
+  // Permute output
+  if (!_is_nchw)
+  {
+    _permute_output.run();
+  }
+}
+
+void NETransposeConvLayer::prepare()
+{
+  if (!_is_prepared)
+  {
+    ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
+
+    // Run weights flipping and mark original weights tensor as unused
+    _weights_flipped.allocator()->allocate();
+    // Permute weights
+    if (!_is_nchw)
+    {
+      _permute_weights.run();
+    }
+    NEScheduler::get().schedule(&_flip_weights, Window::DimZ);
+    _original_weights->mark_as_unused();
+
+    // Prepare convolution
+    _conv_f.prepare();
+
+    if (!_weights_flipped.is_used())
+    {
+      _weights_flipped.allocator()->free();
+    }
+
+    _is_prepared = true;
+  }
+}
+} // namespace arm_compute
diff --git a/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc b/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc
index 98be80b..c4ff292 100644
--- a/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc
+++ b/runtimes/neurun/backend/acl_neon/ConstantInitializer.cc
@@ -78,6 +78,13 @@ void ConstantInitializer::visit(const model::operation::FullyConnectedNode &node
   registerCopyInitializer(bias_index, bias_obj);
 }
 
+void ConstantInitializer::visit(const model::operation::TransposeConvNode &node)
+{
+  const auto &kernel_index = node.getInputs().at(model::operation::TransposeConvNode::KERNEL);
+  const auto &kernel_obj = _operands.at(kernel_index);
+  registerPermuteInitializer(kernel_index, kernel_obj);
+}
+
 } // namespace acl_neon
 } // namespace backend
 } // namespace neurun
diff --git a/runtimes/neurun/backend/acl_neon/ConstantInitializer.h b/runtimes/neurun/backend/acl_neon/ConstantInitializer.h
index 62e889c..e608c7b 100644
--- a/runtimes/neurun/backend/acl_neon/ConstantInitializer.h
+++ b/runtimes/neurun/backend/acl_neon/ConstantInitializer.h
@@ -41,6 +41,7 @@ public:
   void visit(const model::operation::Conv2DNode &) override;
   void visit(const model::operation::DepthwiseConv2DNode &) override;
   void visit(const model::operation::FullyConnectedNode &) override;
+  void visit(const model::operation::TransposeConvNode &) override;
 
 private:
   const model::Operands &_operands;
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
index e7d7eda..a4bb098 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.cc
@@ -30,6 +30,7 @@
 #include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NETransposeConvLayer.h>
 
 #include "kernel/ConcatLayer.h"
 #include "util/Padding.h"
@@ -836,6 +837,54 @@ void KernelGenerator::visit(const model::operation::StridedSliceNode &node)
   throw std::runtime_error("Not supported, yet");
 }
 
+void KernelGenerator::visit(const model::operation::TransposeConvNode &node)
+{
+  const auto ofm_index{node.getOutputs().at(0)};
+  const auto output_shape_index{
+      node.getInputs().at(model::operation::TransposeConvNode::Input::OUTPUT_SHAPE)};
+  const auto ker_index{node.getInputs().at(model::operation::TransposeConvNode::Input::KERNEL)};
+  const auto ifm_index{node.getInputs().at(model::operation::TransposeConvNode::Input::INPUT)};
+
+  const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature();
+  const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature();
+  const auto ker_shape = _ctx.at(ker_index).shape().asFeature();
+
+  const auto stride = node.param().stride;
+
+  assert((node.param().padding.type == model::PaddingType::SAME) ||
+         (node.param().padding.type == model::PaddingType::VALID));
+  auto padding = neurun::util::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
+                                                ker_shape.W, ker_shape.H);
+
+  uint32_t invalid_horizontal = 0;
+  uint32_t invalid_vertical = 0;
+  if (node.param().padding.type == model::PaddingType::VALID)
+  {
+    invalid_horizontal =
+        ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
+    invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
+  }
+
+  auto ofm_alloc = _tensor_builder->at(ofm_index).get();
+  auto ifm_alloc = _tensor_builder->at(ifm_index).get();
+  auto ker_alloc = _tensor_builder->at(ker_index).get();
+
+  const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
+
+  std::unique_ptr<::arm_compute::IFunction> fn;
+
+  auto l = nnfw::cpp14::make_unique<::arm_compute::NETransposeConvLayer>();
+
+  l->configure(ifm_alloc->handle(), ker_alloc->handle(), nullptr, ofm_alloc->handle(), tconv_info,
+               invalid_horizontal, invalid_vertical);
+
+  fn = std::move(l);
+
+  auto acl_fn = asAclFunction(std::move(fn));
+
+  _execution_builder->append(std::move(acl_fn));
+}
+
 void KernelGenerator::visit(const model::operation::TransposeNode &node)
 {
   (void)node;
diff --git a/runtimes/neurun/backend/acl_neon/KernelGenerator.h b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
index a823981..429d0fd 100644
--- a/runtimes/neurun/backend/acl_neon/KernelGenerator.h
+++ b/runtimes/neurun/backend/acl_neon/KernelGenerator.h
@@ -59,6 +59,7 @@ public:
   void visit(const model::operation::SquaredDifferenceNode &) override;
   void visit(const model::operation::SubNode &) override;
   void visit(const model::operation::StridedSliceNode &) override;
+  void visit(const model::operation::TransposeConvNode &) override;
   void visit(const model::operation::TransposeNode &) override;
   void visit(const model::operation::AddNode &) override;
   void visit(const model::operation::DivNode &) override;
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
index c3d158e..2afc3de 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.cc
@@ -195,6 +195,8 @@ void ShapeFixer::visit(const model::operation::SubNode &node)
   }
 }
 
+void ShapeFixer::visit(const model::operation::TransposeConvNode &) { /* DO NOTHING */}
+
 void ShapeFixer::visit(const model::operation::AddNode &node)
 {
   const auto lhs_index{node.getInputs().at(model::operation::AddNode::Input::LHS)};
diff --git a/runtimes/neurun/backend/acl_neon/ShapeFixer.h b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
index e038d37..60f7a2e 100644
--- a/runtimes/neurun/backend/acl_neon/ShapeFixer.h
+++ b/runtimes/neurun/backend/acl_neon/ShapeFixer.h
@@ -57,6 +57,7 @@ public:
   void visit(const model::operation::SQRTNode &) override;
   void visit(const model::operation::SquaredDifferenceNode &) override;
   void visit(const model::operation::SubNode &) override;
+  void visit(const model::operation::TransposeConvNode &) override;
   void visit(const model::operation::AddNode &) override;
   void visit(const model::operation::DivNode &) override;
   void visit(const model::operation::ComparisonNode &) override;
diff --git a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
index fe4625d..5678c7d 100644
--- a/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
+++ b/tests/nnapi/nnapi_gtest.skip.armv7l-linux.acl_neon
@@ -62,7 +62,6 @@ GeneratedTests.reduce_sum_ex*
 GeneratedTests.topk_v2*
 # Unexpected result
 GeneratedTests.split*
-GeneratedTests.transpose_conv*
 GeneratedTests.pack*
 GeneratedTests.unpack*
 generatedtests.logical_not_ex*
diff --git a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt
index 2d3dd19..7dda80a 100644
--- a/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt
+++ b/tests/scripts/neurun_frameworktest_list.armv7l.acl_neon.txt
@@ -12,5 +12,6 @@ reshape
 softmax
 sqrt
 tanh
+transpose_conv
 MODELS/inception_module
 MODELS/mobilenet
-- 
2.7.4