2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * @file TransposeConv benchmark with various algorithms
21 #include <nonius/nonius.h++>
23 #include <arm_compute/core/Types.h>
24 #include <arm_compute/runtime/NEON/NEScheduler.h>
25 #include <arm_compute/runtime/NEON/NEFunctions.h>
31 #include "acl_common/Utils.h"
33 using namespace arm_compute;
34 using namespace kbenchmark::kernels::acl_common;
48 TensorInfo make_info(uint32_t N)
51 return TensorInfo{shape, 1, DataType::F32};
54 template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
56 template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
58 TensorShape shape{W, H, C, N};
59 TensorInfo info{shape, 1, DataType::F32};
60 info.set_data_layout(DataLayout::NCHW);
64 template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
66 TensorShape shape{C, W, H, N};
67 TensorInfo info{shape, 1, DataType::F32};
68 info.set_data_layout(DataLayout::NHWC);
72 inline void check(const Status &status)
76 std::cerr << status.error_description() << std::endl;
77 throw std::runtime_error{"ERROR"};
81 inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
86 // Benchmark Parameters
88 NONIUS_PARAM(BATCH, 1);
90 NONIUS_PARAM(IFM_C, 3);
91 NONIUS_PARAM(IFM_H, 244);
92 NONIUS_PARAM(IFM_W, 244);
94 NONIUS_PARAM(OFM_C, 3);
95 NONIUS_PARAM(OFM_H, 244);
96 NONIUS_PARAM(OFM_W, 244);
98 NONIUS_PARAM(KER_H, 3);
99 NONIUS_PARAM(KER_W, 3);
101 NONIUS_PARAM(STRIDE_H, 1);
102 NONIUS_PARAM(STRIDE_W, 1);
104 NONIUS_PARAM(PADDING, std::string{"SAME"})
107 // Configuration Helpers
129 uint32_t vertical_stride;
130 uint32_t horizontal_stride;
132 PadStrideInfo deconv_info;
134 uint32_t inner_border_right;
135 uint32_t inner_border_top;
137 Configuration(nonius::chronometer meter)
139 ifm_N = meter.param<BATCH>();
140 ifm_C = meter.param<IFM_C>();
141 ifm_H = meter.param<IFM_H>();
142 ifm_W = meter.param<IFM_W>();
144 ofm_N = meter.param<BATCH>();
145 ofm_C = meter.param<OFM_C>();
146 ofm_H = meter.param<OFM_H>();
147 ofm_W = meter.param<OFM_W>();
149 ker_N = meter.param<OFM_C>();
150 ker_C = meter.param<IFM_C>();
151 ker_H = meter.param<KER_H>();
152 ker_W = meter.param<KER_W>();
154 vertical_stride = meter.param<STRIDE_H>();
155 horizontal_stride = meter.param<STRIDE_W>();
157 // NOTE The padding calculation formula of TransposeConv is opposite to Conv.
158 // So the location of ifm and ofm is changed.
159 auto padding_info = calculatePadding(meter.param<PADDING>(), ofm_H, ofm_W, ifm_H, ifm_W,
160 vertical_stride, horizontal_stride, ker_H, ker_W);
162 inner_border_right = padding_info.right - padding_info.left;
163 inner_border_top = padding_info.bottom - padding_info.top;
165 padding_info.left = padding_info.right;
166 padding_info.top = padding_info.bottom;
168 deconv_info = asPadStrideInfo(padding_info, vertical_stride, horizontal_stride);
171 template <Layout L> TensorInfo src_info() const
173 return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
175 template <Layout L> TensorInfo dst_info() const
177 return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
179 template <Layout L> TensorInfo ker_info() const
181 return make_info<L>(ker_N, ker_C, ker_H, ker_W);
183 TensorInfo bias_info(void) const { return make_info(ker_N); }
189 // Benchmark Implementations
194 inline nonius::benchmark_registry &local_benchmark_registry()
196 static nonius::benchmark_registry registry;
202 #define NONIUS_LOCAL_BENCHMARK(name, ...) \
205 static ::nonius::benchmark_registrar \
206 NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, \
210 NONIUS_LOCAL_BENCHMARK("NEDeconvolutionLayer_NCHW", [](nonius::chronometer meter) {
211 NEDeconvolutionLayer deconv;
214 Configuration p{meter};
220 src_tensor.allocator()->init(p.src_info<NCHW>());
221 dst_tensor.allocator()->init(p.dst_info<NCHW>());
222 ker_tensor.allocator()->init(p.ker_info<NCHW>());
226 check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
227 p.deconv_info, p.inner_border_right, p.inner_border_top));
231 meter.measure([&](int) {
239 deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
240 p.inner_border_right, p.inner_border_top);
242 src_tensor.allocator()->allocate();
243 ker_tensor.allocator()->allocate();
244 dst_tensor.allocator()->allocate();
247 meter.measure([&](int) { deconv.run(); });
250 NONIUS_LOCAL_BENCHMARK("NEDeconvolutionLayer_NHWC", [](nonius::chronometer meter) {
251 NEDeconvolutionLayer deconv;
254 Configuration p{meter};
260 src_tensor.allocator()->init(p.src_info<NHWC>());
261 dst_tensor.allocator()->init(p.dst_info<NHWC>());
262 ker_tensor.allocator()->init(p.ker_info<NHWC>());
266 check(deconv.validate(src_tensor.info(), ker_tensor.info(), nullptr, dst_tensor.info(),
267 p.deconv_info, p.inner_border_right, p.inner_border_top));
271 meter.measure([&](int) {
279 deconv.configure(&src_tensor, &ker_tensor, nullptr, &dst_tensor, p.deconv_info,
280 p.inner_border_right, p.inner_border_top);
282 src_tensor.allocator()->allocate();
283 ker_tensor.allocator()->allocate();
284 dst_tensor.allocator()->allocate();
287 meter.measure([&](int) { deconv.run(); });
290 extern "C" nonius::benchmark_registry &benchmark_functions(void)
292 return local_benchmark_registry();