[kbenchmark] Add TransposeConv kernel (#5546)
author윤지영/On-Device Lab(SR)/Staff Engineer/삼성전자 <jy910.yun@samsung.com>
Thu, 4 Jul 2019 10:08:40 +0000 (19:08 +0900)
committer이한종/On-Device Lab(SR)/Engineer/삼성전자 <hanjoung.lee@samsung.com>
Thu, 4 Jul 2019 10:08:40 +0000 (19:08 +0900)
This patch adds acl cl and neon TransposeConv kernel test sets.
It also tests CLDeconvolutionLayerEx.

Signed-off-by: Jiyoung Yun <jy910.yun@samsung.com>
tools/kbenchmark/OperationLoader.h
tools/kbenchmark/Operations.lst
tools/kbenchmark/kernels/acl_common/Utils.h [new file with mode: 0644]
tools/kbenchmark/kernels/armcl/CMakeLists.txt
tools/kbenchmark/kernels/armcl/TransposeConv.cpp [new file with mode: 0644]
tools/kbenchmark/kernels/armne/CMakeLists.txt
tools/kbenchmark/kernels/armne/TransposeConv.cpp [new file with mode: 0644]
tools/kbenchmark/operations/TransposeConv.h [new file with mode: 0644]

index 701033d..3659c30 100644 (file)
@@ -22,6 +22,7 @@
 
 #include "Operation.h"
 #include "operations/Convolution.h"
+#include "operations/TransposeConv.h"
 
 namespace kbenchmark
 {
index 1ebdab1..6e51af6 100644 (file)
@@ -20,3 +20,4 @@
 
 // Config Name        Operation Name
 OP("CONV_2D",         Convolution)
+OP("TRANSPOSE_CONV",  TransposeConv)
diff --git a/tools/kbenchmark/kernels/acl_common/Utils.h b/tools/kbenchmark/kernels/acl_common/Utils.h
new file mode 100644 (file)
index 0000000..2f3648d
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KBENCHMARK_KERNELS_ACL_COMMON_UTILS_H__
+#define __KBENCHMARK_KERNELS_ACL_COMMON_UTILS_H__
+
+#include <algorithm>
+
+using namespace arm_compute;
+
+namespace kbenchmark
+{
+namespace kernels
+{
+namespace acl_common
+{
+
+struct PaddingInfo
+{
+  uint32_t top;
+  uint32_t bottom;
+  uint32_t left;
+  uint32_t right;
+};
+
+PaddingInfo calculatePadding(const std::string &padding_name, const uint32_t ifm_H,
+                             const uint32_t ifm_W, const uint32_t ofm_H, const uint32_t ofm_W,
+                             const uint32_t vertical_stride, const uint32_t horizontal_stride,
+                             const uint32_t ker_H, const uint32_t ker_W)
+{
+  uint32_t top;
+  uint32_t bottom;
+  uint32_t left;
+  uint32_t right;
+
+  if (padding_name == "VALID")
+  {
+    top = bottom = left = right = 0;
+  }
+  else if (padding_name == "SAME")
+  {
+    const int32_t vertical_needed_input = (ofm_H - 1) * vertical_stride + ker_H;
+    const int32_t vertical_total_padding = std::max(0, vertical_needed_input - (int32_t)ifm_H);
+
+    const int32_t horizontal_needed_input = (ofm_W - 1) * horizontal_stride + ker_W;
+    const int32_t horizontal_total_padding = std::max(0, horizontal_needed_input - (int32_t)ifm_W);
+
+    top = vertical_total_padding / 2;
+    bottom = (vertical_total_padding + 1) / 2;
+    left = horizontal_total_padding / 2;
+    right = (horizontal_total_padding + 1) / 2;
+  }
+
+  return PaddingInfo{top, bottom, left, right};
+}
+
+PadStrideInfo asPadStrideInfo(const PaddingInfo &padding, uint32_t vertical_stride,
+                              uint32_t horizontal_stride)
+{
+  return PadStrideInfo{horizontal_stride,
+                       vertical_stride,
+                       padding.left,
+                       padding.right,
+                       padding.top,
+                       padding.bottom,
+                       DimensionRoundingType::FLOOR};
+}
+
+ActivationLayerInfo asActivationLayerInfo(const std::string &act_name)
+{
+  if (act_name == "NONE")
+  {
+    return ActivationLayerInfo{};
+  }
+  else if (act_name == "RELU")
+  {
+    return ActivationLayerInfo{ActivationLayerInfo::ActivationFunction::RELU};
+  }
+  else
+  {
+    throw std::runtime_error{"Not support activation layer info"};
+  }
+}
+
+} // namespace acl_common
+} // namespace kernels
+} // namespace kbenchmark
+
+#endif // __KBENCHMARK_KERNELS_ACL_COMMON_UTILS_H__
index 0fe3150..752d4e3 100644 (file)
@@ -7,10 +7,13 @@ function(add_kben_armcl_library)
   cmake_parse_arguments(ARG "" "NAME" "SOURCES" ${ARGN})
 
   add_library(${ARG_NAME} SHARED ${ARG_SOURCES})
+  target_include_directories(${ARG_NAME} PUBLIC ../acl_common)
   target_link_libraries(${ARG_NAME} nonius)
   target_link_libraries(${ARG_NAME} arm_compute)
+  target_link_libraries(${ARG_NAME} arm_compute_ex)
   target_link_libraries(${ARG_NAME} pthread)
   install(TARGETS ${ARG_NAME} DESTINATION lib/kben)
 endfunction(add_kben_armcl_library)
 
-add_kben_armcl_library(NAME kben_armcl_conv SOURCES Convolution.cpp)
\ No newline at end of file
+add_kben_armcl_library(NAME kben_armcl_conv SOURCES Convolution.cpp)
+add_kben_armcl_library(NAME kben_armcl_transpose_conv SOURCES TransposeConv.cpp)
diff --git a/tools/kbenchmark/kernels/armcl/TransposeConv.cpp b/tools/kbenchmark/kernels/armcl/TransposeConv.cpp
new file mode 100644 (file)
index 0000000..2c72ecd
--- /dev/null
@@ -0,0 +1,393 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TransposeConv benchmark with various algorithms
+ */
+
+#include <nonius/nonius.h++>
+
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/CL/CLFunctionsEx.h>
+
+#include <cstdint>
+#include <cassert>
+#include <stdexcept>
+
+#include "Utils.h"
+
+using namespace arm_compute;
+using namespace kbenchmark::kernels::acl_common;
+
+//
+// Helpers
+//
+namespace
+{
+
+enum Layout
+{
+  NCHW,
+  NHWC
+};
+
+struct Initializer
+{
+  Initializer() { CLScheduler::get().default_init(); }
+};
+
+Initializer initializer;
+
+TensorInfo make_info(uint32_t N)
+{
+  TensorShape shape{N};
+  return TensorInfo{shape, 1, DataType::F32};
+}
+
+template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
+
+template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+{
+  TensorShape shape{W, H, C, N};
+  TensorInfo info{shape, 1, DataType::F32};
+  info.set_data_layout(DataLayout::NCHW);
+  return info;
+}
+
+template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+{
+  TensorShape shape{C, W, H, N};
+  TensorInfo info{shape, 1, DataType::F32};
+  info.set_data_layout(DataLayout::NHWC);
+  return info;
+}
+
+inline void check(const Status &status)
+{
+  if (!status)
+  {
+    std::cerr << status.error_description() << std::endl;
+    throw std::runtime_error{"ERROR"};
+  }
+}
+
+inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
+
+} // namespace
+
+//
+// Benchmark Parameters
+//
+NONIUS_PARAM(BATCH, 1);
+
+NONIUS_PARAM(IFM_C, 3);
+NONIUS_PARAM(IFM_H, 244);
+NONIUS_PARAM(IFM_W, 244);
+
+NONIUS_PARAM(OFM_C, 3);
+NONIUS_PARAM(OFM_H, 244);
+NONIUS_PARAM(OFM_W, 244);
+
+NONIUS_PARAM(KER_H, 3);
+NONIUS_PARAM(KER_W, 3);
+
+NONIUS_PARAM(STRIDE_H, 1);
+NONIUS_PARAM(STRIDE_W, 1);
+
+NONIUS_PARAM(PADDING, std::string{"SAME"})
+
+//
+// Configuration Helpers
+//
+namespace
+{
+
+struct Configuration
+{
+  uint32_t ifm_N;
+  uint32_t ifm_C;
+  uint32_t ifm_H;
+  uint32_t ifm_W;
+
+  uint32_t ofm_N;
+  uint32_t ofm_C;
+  uint32_t ofm_H;
+  uint32_t ofm_W;
+
+  uint32_t ker_N;
+  uint32_t ker_C;
+  uint32_t ker_H;
+  uint32_t ker_W;
+
+  uint32_t vertical_stride;
+  uint32_t horizontal_stride;
+
+  PadStrideInfo deconv_info;
+
+  uint32_t inner_border_right;
+  uint32_t inner_border_top;
+
+  Configuration(nonius::chronometer meter)
+  {
+    ifm_N = meter.param<BATCH>();
+    ifm_C = meter.param<IFM_C>();
+    ifm_H = meter.param<IFM_H>();
+    ifm_W = meter.param<IFM_W>();
+
+    ofm_N = meter.param<BATCH>();
+    ofm_C = meter.param<OFM_C>();
+    ofm_H = meter.param<OFM_H>();
+    ofm_W = meter.param<OFM_W>();
+
+    ker_N = meter.param<OFM_C>();
+    ker_C = meter.param<IFM_C>();
+    ker_H = meter.param<KER_H>();
+    ker_W = meter.param<KER_W>();
+
+    vertical_stride = meter.param<STRIDE_H>();
+    horizontal_stride = meter.param<STRIDE_W>();
+
+    // NOTE The padding calculation formula of TransposeConv is opposite to Conv.
+    //      So the location of ifm and ofm is changed.
+    auto padding_info = calculatePadding(meter.param<PADDING>(), ofm_H, ofm_W, ifm_H, ifm_W,
+                                         vertical_stride, horizontal_stride, ker_H, ker_W);
+
+    inner_border_right = padding_info.right - padding_info.left;
+    inner_border_top = padding_info.bottom - padding_info.top;
+
+    padding_info.left = padding_info.right;
+    padding_info.top = padding_info.bottom;
+
+    deconv_info = asPadStrideInfo(padding_info, vertical_stride, horizontal_stride);
+  }
+
+  template <Layout L> TensorInfo src_info() const
+  {
+    return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
+  }
+  template <Layout L> TensorInfo dst_info() const
+  {
+    return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
+  }
+  template <Layout L> TensorInfo ker_info() const
+  {
+    return make_info<L>(ker_N, ker_C, ker_H, ker_W);
+  }
+  TensorInfo bias_info(void) const { return make_info(ker_N); }
+};
+
+} // namespace
+
+//
+// Benchmark Implementations
+//
+namespace
+{
+
+inline nonius::benchmark_registry &local_benchmark_registry()
+{
+  static nonius::benchmark_registry registry;
+  return registry;
+}
+
+} // namespace
+
+#define NONIUS_LOCAL_BENCHMARK(name, ...)                                              \
+  namespace                                                                            \
+  {                                                                                    \
+  static ::nonius::benchmark_registrar                                                 \
+      NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
+                                                     __VA_ARGS__);                     \
+  }
+
+NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
+  CLDeconvolutionLayer deconv;
+
+  // Configure
+  Configuration p{meter};
+
+  CLTensor src_tensor{};
+  CLTensor dst_tensor{};
+  CLTensor ker_tensor{};
+
+  src_tensor.allocator()->init(p.src_info<NCHW>());
+  dst_tensor.allocator()->init(p.dst_info<NCHW>());
+  ker_tensor.allocator()->init(p.ker_info<NCHW>());
+
+  try
+  {
+    check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
+                          p.deconv_info, p.inner_border_right, p.inner_border_top));
+  }
+  catch (...)
+  {
+    meter.measure([&](int) {
+      // DO NOTHING
+      volatile int x = 0;
+      return x;
+    });
+    return;
+  }
+
+  deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
+                   p.inner_border_right, p.inner_border_top);
+
+  src_tensor.allocator()->allocate();
+  ker_tensor.allocator()->allocate();
+  dst_tensor.allocator()->allocate();
+
+  // Run!
+  meter.measure([&](int) {
+    deconv.run();
+    CLScheduler::get().sync();
+  });
+})
+
+NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayer_NHWC", [](nonius::chronometer meter) {
+  CLDeconvolutionLayer deconv;
+
+  // Configure
+  Configuration p{meter};
+
+  CLTensor src_tensor{};
+  CLTensor dst_tensor{};
+  CLTensor ker_tensor{};
+
+  src_tensor.allocator()->init(p.src_info<NHWC>());
+  dst_tensor.allocator()->init(p.dst_info<NHWC>());
+  ker_tensor.allocator()->init(p.ker_info<NHWC>());
+
+  try
+  {
+    check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
+                          p.deconv_info, p.inner_border_right, p.inner_border_top));
+  }
+  catch (...)
+  {
+    meter.measure([&](int) {
+      // DO NOTHING
+      volatile int x = 0;
+      return x;
+    });
+    return;
+  }
+
+  deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
+                   p.inner_border_right, p.inner_border_top);
+
+  src_tensor.allocator()->allocate();
+  ker_tensor.allocator()->allocate();
+  dst_tensor.allocator()->allocate();
+
+  // Run!
+  meter.measure([&](int) {
+    deconv.run();
+    CLScheduler::get().sync();
+  });
+})
+
+NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayerEx_NCHW", [](nonius::chronometer meter) {
+  CLDeconvolutionLayerEx deconv;
+
+  // Configure
+  Configuration p{meter};
+
+  CLTensor src_tensor{};
+  CLTensor dst_tensor{};
+  CLTensor ker_tensor{};
+
+  src_tensor.allocator()->init(p.src_info<NCHW>());
+  dst_tensor.allocator()->init(p.dst_info<NCHW>());
+  ker_tensor.allocator()->init(p.ker_info<NCHW>());
+
+  try
+  {
+    check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
+                          p.deconv_info, p.inner_border_right, p.inner_border_top));
+  }
+  catch (...)
+  {
+    meter.measure([&](int) {
+      // DO NOTHING
+      volatile int x = 0;
+      return x;
+    });
+    return;
+  }
+
+  deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
+                   p.inner_border_right, p.inner_border_top);
+
+  src_tensor.allocator()->allocate();
+  ker_tensor.allocator()->allocate();
+  dst_tensor.allocator()->allocate();
+
+  // Run!
+  meter.measure([&](int) {
+    deconv.run();
+    CLScheduler::get().sync();
+  });
+})
+
+NONIUS_LOCAL_BENCHMARK("CLDeconvolutionLayerEx_NHWC", [](nonius::chronometer meter) {
+  CLDeconvolutionLayerEx deconv;
+
+  // Configure
+  Configuration p{meter};
+
+  CLTensor src_tensor{};
+  CLTensor dst_tensor{};
+  CLTensor ker_tensor{};
+
+  src_tensor.allocator()->init(p.src_info<NHWC>());
+  dst_tensor.allocator()->init(p.dst_info<NHWC>());
+  ker_tensor.allocator()->init(p.ker_info<NHWC>());
+
+  try
+  {
+    check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
+                          p.deconv_info, p.inner_border_right, p.inner_border_top));
+  }
+  catch (...)
+  {
+    meter.measure([&](int) {
+      // DO NOTHING
+      volatile int x = 0;
+      return x;
+    });
+    return;
+  }
+
+  deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
+                   p.inner_border_right, p.inner_border_top);
+
+  src_tensor.allocator()->allocate();
+  ker_tensor.allocator()->allocate();
+  dst_tensor.allocator()->allocate();
+
+  // Run!
+  meter.measure([&](int) {
+    deconv.run();
+    CLScheduler::get().sync();
+  });
+})
+
+extern "C" nonius::benchmark_registry &benchmark_functions(void)
+{
+  return local_benchmark_registry();
+}
index bb38702..d267c36 100644 (file)
@@ -7,10 +7,12 @@ function(add_kben_armne_library)
   cmake_parse_arguments(ARG "" "NAME" "SOURCES" ${ARGN})
 
   add_library(${ARG_NAME} SHARED ${ARG_SOURCES})
+  target_include_directories(${ARG_NAME} PUBLIC ../acl_common)
   target_link_libraries(${ARG_NAME} nonius)
   target_link_libraries(${ARG_NAME} arm_compute)
   target_link_libraries(${ARG_NAME} pthread)
   install(TARGETS ${ARG_NAME} DESTINATION lib/kben)
 endfunction(add_kben_armne_library)
 
-add_kben_armne_library(NAME kben_armne_conv SOURCES Convolution.cpp)
\ No newline at end of file
+add_kben_armne_library(NAME kben_armne_conv SOURCES Convolution.cpp)
+add_kben_armne_library(NAME kben_armne_transpose_conv SOURCES TransposeConv.cpp)
diff --git a/tools/kbenchmark/kernels/armne/TransposeConv.cpp b/tools/kbenchmark/kernels/armne/TransposeConv.cpp
new file mode 100644 (file)
index 0000000..bb61b86
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file TransposeConv benchmark with various algorithms
+ */
+
+#include <nonius/nonius.h++>
+
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/NEON/NEScheduler.h>
+#include <arm_compute/runtime/NEON/NEFunctions.h>
+
+#include <cstdint>
+#include <cassert>
+#include <stdexcept>
+
+#include "Utils.h"
+
+using namespace arm_compute;
+using namespace kbenchmark::kernels::acl_common;
+
+//
+// Helpers
+//
+namespace
+{
+
+enum Layout
+{
+  NCHW,
+  NHWC
+};
+
+TensorInfo make_info(uint32_t N)
+{
+  TensorShape shape{N};
+  return TensorInfo{shape, 1, DataType::F32};
+}
+
+template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
+
+template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+{
+  TensorShape shape{W, H, C, N};
+  TensorInfo info{shape, 1, DataType::F32};
+  info.set_data_layout(DataLayout::NCHW);
+  return info;
+}
+
+template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
+{
+  TensorShape shape{C, W, H, N};
+  TensorInfo info{shape, 1, DataType::F32};
+  info.set_data_layout(DataLayout::NHWC);
+  return info;
+}
+
+inline void check(const Status &status)
+{
+  if (!status)
+  {
+    std::cerr << status.error_description() << std::endl;
+    throw std::runtime_error{"ERROR"};
+  }
+}
+
+inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
+
+} // namespace
+
+//
+// Benchmark Parameters
+//
+NONIUS_PARAM(BATCH, 1);
+
+NONIUS_PARAM(IFM_C, 3);
+NONIUS_PARAM(IFM_H, 244);
+NONIUS_PARAM(IFM_W, 244);
+
+NONIUS_PARAM(OFM_C, 3);
+NONIUS_PARAM(OFM_H, 244);
+NONIUS_PARAM(OFM_W, 244);
+
+NONIUS_PARAM(KER_H, 3);
+NONIUS_PARAM(KER_W, 3);
+
+NONIUS_PARAM(STRIDE_H, 1);
+NONIUS_PARAM(STRIDE_W, 1);
+
+NONIUS_PARAM(PADDING, std::string{"SAME"})
+
+//
+// Configuration Helpers
+//
+namespace
+{
+
+struct Configuration
+{
+  uint32_t ifm_N;
+  uint32_t ifm_C;
+  uint32_t ifm_H;
+  uint32_t ifm_W;
+
+  uint32_t ofm_N;
+  uint32_t ofm_C;
+  uint32_t ofm_H;
+  uint32_t ofm_W;
+
+  uint32_t ker_N;
+  uint32_t ker_C;
+  uint32_t ker_H;
+  uint32_t ker_W;
+
+  uint32_t vertical_stride;
+  uint32_t horizontal_stride;
+
+  PadStrideInfo deconv_info;
+
+  uint32_t inner_border_right;
+  uint32_t inner_border_top;
+
+  Configuration(nonius::chronometer meter)
+  {
+    ifm_N = meter.param<BATCH>();
+    ifm_C = meter.param<IFM_C>();
+    ifm_H = meter.param<IFM_H>();
+    ifm_W = meter.param<IFM_W>();
+
+    ofm_N = meter.param<BATCH>();
+    ofm_C = meter.param<OFM_C>();
+    ofm_H = meter.param<OFM_H>();
+    ofm_W = meter.param<OFM_W>();
+
+    ker_N = meter.param<OFM_C>();
+    ker_C = meter.param<IFM_C>();
+    ker_H = meter.param<KER_H>();
+    ker_W = meter.param<KER_W>();
+
+    vertical_stride = meter.param<STRIDE_H>();
+    horizontal_stride = meter.param<STRIDE_W>();
+
+    // NOTE The padding calculation formula of TransposeConv is opposite to Conv.
+    //      So the location of ifm and ofm is changed.
+    auto padding_info = calculatePadding(meter.param<PADDING>(), ofm_H, ofm_W, ifm_H, ifm_W,
+                                         vertical_stride, horizontal_stride, ker_H, ker_W);
+
+    inner_border_right = padding_info.right - padding_info.left;
+    inner_border_top = padding_info.bottom - padding_info.top;
+
+    padding_info.left = padding_info.right;
+    padding_info.top = padding_info.bottom;
+
+    deconv_info = asPadStrideInfo(padding_info, vertical_stride, horizontal_stride);
+  }
+
+  template <Layout L> TensorInfo src_info() const
+  {
+    return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
+  }
+  template <Layout L> TensorInfo dst_info() const
+  {
+    return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
+  }
+  template <Layout L> TensorInfo ker_info() const
+  {
+    return make_info<L>(ker_N, ker_C, ker_H, ker_W);
+  }
+  TensorInfo bias_info(void) const { return make_info(ker_N); }
+};
+
+} // namespace
+
+//
+// Benchmark Implementations
+//
+namespace
+{
+
+inline nonius::benchmark_registry &local_benchmark_registry()
+{
+  static nonius::benchmark_registry registry;
+  return registry;
+}
+
+} // namespace
+
+#define NONIUS_LOCAL_BENCHMARK(name, ...)                                              \
+  namespace                                                                            \
+  {                                                                                    \
+  static ::nonius::benchmark_registrar                                                 \
+      NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
+                                                     __VA_ARGS__);                     \
+  }
+
+NONIUS_LOCAL_BENCHMARK("NEDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
+  NEDeconvolutionLayer deconv;
+
+  // Configure
+  Configuration p{meter};
+
+  Tensor src_tensor{};
+  Tensor dst_tensor{};
+  Tensor ker_tensor{};
+
+  src_tensor.allocator()->init(p.src_info<NCHW>());
+  dst_tensor.allocator()->init(p.dst_info<NCHW>());
+  ker_tensor.allocator()->init(p.ker_info<NCHW>());
+
+  try
+  {
+    check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
+                          p.deconv_info, p.inner_border_right, p.inner_border_top));
+  }
+  catch (...)
+  {
+    meter.measure([&](int) {
+      // DO NOTHING
+      volatile int x = 0;
+      return x;
+    });
+    return;
+  }
+
+  deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
+                   p.inner_border_right, p.inner_border_top);
+
+  src_tensor.allocator()->allocate();
+  ker_tensor.allocator()->allocate();
+  dst_tensor.allocator()->allocate();
+
+  // Run!
+  meter.measure([&](int) { deconv.run(); });
+})
+
+NONIUS_LOCAL_BENCHMARK("NEDeconvolutionLayer_NHWC", [](nonius::chronometer meter) {
+  NEDeconvolutionLayer deconv;
+
+  // Configure
+  Configuration p{meter};
+
+  Tensor src_tensor{};
+  Tensor dst_tensor{};
+  Tensor ker_tensor{};
+
+  src_tensor.allocator()->init(p.src_info<NHWC>());
+  dst_tensor.allocator()->init(p.dst_info<NHWC>());
+  ker_tensor.allocator()->init(p.ker_info<NHWC>());
+
+  try
+  {
+    check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
+                          p.deconv_info, p.inner_border_right, p.inner_border_top));
+  }
+  catch (...)
+  {
+    meter.measure([&](int) {
+      // DO NOTHING
+      volatile int x = 0;
+      return x;
+    });
+    return;
+  }
+
+  deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
+                   p.inner_border_right, p.inner_border_top);
+
+  src_tensor.allocator()->allocate();
+  ker_tensor.allocator()->allocate();
+  dst_tensor.allocator()->allocate();
+
+  // Run!
+  meter.measure([&](int) { deconv.run(); });
+})
+
+extern "C" nonius::benchmark_registry &benchmark_functions(void)
+{
+  return local_benchmark_registry();
+}
diff --git a/tools/kbenchmark/operations/TransposeConv.h b/tools/kbenchmark/operations/TransposeConv.h
new file mode 100644 (file)
index 0000000..69ab307
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __KBENCHMARK_OPERATIONS_TRANSPOSE_CONV_H__
+#define __KBENCHMARK_OPERATIONS_TRANSPOSE_CONV_H__
+
+#include "Operation.h"
+#include "Utils.h"
+
+namespace kbenchmark
+{
+namespace operation
+{
+
+class TransposeConv final : public Operation
+{
+public:
+  TransposeConv() = default;
+
+  nonius::parameters params(int layer_num, OperationInfo &info) override
+  {
+    nonius::parameters params;
+
+    params.insert({"LAYER", nonius::param{layer_num}});
+
+    params.insert({"BATCH", nonius::param{1}});
+
+    // TODO The output_shape will be used in Dynamic tensor case
+    auto _output_shape = get_key_dims({"input0"}, info);
+
+    auto _weights = get_key_dims({"input1"}, info);
+    params.insert({"KER_H", nonius::param{_weights[1]}});
+    params.insert({"KER_W", nonius::param{_weights[2]}});
+
+    auto _input = get_key_dims({"input2"}, info);
+    params.insert({"IFM_H", nonius::param{_input[1]}});
+    params.insert({"IFM_W", nonius::param{_input[2]}});
+    params.insert({"IFM_C", nonius::param{_input[3]}});
+
+    auto _output = get_key_dims({"output0"}, info);
+    params.insert({"OFM_H", nonius::param{_output[1]}});
+    params.insert({"OFM_W", nonius::param{_output[2]}});
+    params.insert({"OFM_C", nonius::param{_output[3]}});
+
+    auto _stride_h = get_key_int({"stride_h"}, info);
+    auto _stride_w = get_key_int({"stride_w"}, info);
+    params.insert({"STRIDE_H", nonius::param{_stride_h}});
+    params.insert({"STRIDE_W", nonius::param{_stride_w}});
+
+    auto _pad = get_key_string({"padding"}, info);
+    params.insert({"PADDING", nonius::param{_pad}});
+
+    return params;
+  }
+};
+
+} // namespace operation
+} // namespace kbenchmark
+
+#endif // __KBENCHMARK_OPERATIONS_TRANSPOSE_CONV_H__