2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * @file Conv2D benchmark with various algorithms (draft version)
21 #include <nonius/nonius.h++>
23 #include <arm_compute/core/Types.h>
24 #include <arm_compute/runtime/NEON/NEScheduler.h>
25 #include <arm_compute/runtime/NEON/NEFunctions.h>
31 using namespace arm_compute;
45 TensorInfo make_info(uint32_t N)
48 return TensorInfo{shape, 1, DataType::F32};
51 template <enum Layout> TensorInfo make_info(uint32_t N, uint32_t C, uint32_t H, uint32_t W);
53 template <> TensorInfo make_info<NCHW>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
55 TensorShape shape{W, H, C, N};
56 TensorInfo info{shape, 1, DataType::F32};
57 info.set_data_layout(DataLayout::NCHW);
61 template <> TensorInfo make_info<NHWC>(uint32_t N, uint32_t C, uint32_t H, uint32_t W)
63 TensorShape shape{C, W, H, N};
64 TensorInfo info{shape, 1, DataType::F32};
65 info.set_data_layout(DataLayout::NHWC);
69 inline void check(const Status &status)
73 std::cerr << status.error_description() << std::endl;
74 throw std::runtime_error{"ERROR"};
78 inline bool is_odd(uint32_t n) { return (n % 2 != 0) ? true : false; }
83 // Benchmark Parameters
85 NONIUS_PARAM(BATCH, 1);
87 NONIUS_PARAM(IFM_C, 3);
88 NONIUS_PARAM(IFM_H, 244);
89 NONIUS_PARAM(IFM_W, 244);
91 NONIUS_PARAM(OFM_C, 3);
92 NONIUS_PARAM(OFM_H, 244);
93 NONIUS_PARAM(OFM_W, 244);
95 NONIUS_PARAM(KER_H, 3);
96 NONIUS_PARAM(KER_W, 3);
98 NONIUS_PARAM(STRIDE_H, 1);
99 NONIUS_PARAM(STRIDE_W, 1);
101 NONIUS_PARAM(PADDING, std::string{"SAME"})
102 NONIUS_PARAM(FUSED_ACT, std::string{"RELU"})
105 // Configuration Helpers
127 uint32_t vertical_stride;
128 uint32_t horizontal_stride;
131 std::string fused_act;
133 uint32_t top_padding;
134 uint32_t bottom_padding;
135 uint32_t left_padding;
136 uint32_t right_padding;
138 Configuration(nonius::chronometer meter)
140 ifm_N = meter.param<BATCH>();
141 ifm_C = meter.param<IFM_C>();
142 ifm_H = meter.param<IFM_H>();
143 ifm_W = meter.param<IFM_W>();
145 ofm_N = meter.param<BATCH>();
146 ofm_C = meter.param<OFM_C>();
147 ofm_H = meter.param<OFM_H>();
148 ofm_W = meter.param<OFM_W>();
150 ker_N = meter.param<OFM_C>();
151 ker_C = meter.param<IFM_C>();
152 ker_H = meter.param<KER_H>();
153 ker_W = meter.param<KER_W>();
155 vertical_stride = meter.param<STRIDE_H>();
156 horizontal_stride = meter.param<STRIDE_W>();
158 padding = meter.param<PADDING>();
159 fused_act = meter.param<FUSED_ACT>();
161 assert((ifm_H - ker_H) % vertical_stride == 0);
162 assert((ifm_W - ker_H) % horizontal_stride == 0);
164 uint32_t const effective_ofm_H = (ifm_H - ker_H) / vertical_stride + 1;
165 uint32_t const effective_ofm_W = (ifm_W - ker_H) / horizontal_stride + 1;
167 assert(ofm_H >= effective_ofm_H);
168 assert(ofm_W >= effective_ofm_W);
170 uint32_t const pad_H = ofm_H - effective_ofm_H;
171 uint32_t const pad_W = ofm_W - effective_ofm_W;
173 top_padding = pad_H / 2;
174 bottom_padding = pad_H / 2;
175 left_padding = pad_W / 2;
176 right_padding = pad_W / 2;
184 template <Layout L> TensorInfo src_info() const
186 return make_info<L>(ifm_N, ifm_C, ifm_H, ifm_W);
188 template <Layout L> TensorInfo dst_info() const
190 return make_info<L>(ofm_N, ofm_C, ofm_H, ofm_W);
192 template <Layout L> TensorInfo ker_info() const
194 return make_info<L>(ker_N, ker_C, ker_H, ker_W);
196 TensorInfo bias_info(void) const { return make_info(ker_N); }
198 PadStrideInfo pad_stride_info(void) const
200 return PadStrideInfo{horizontal_stride,
206 DimensionRoundingType::FLOOR};
213 // Benchmark Implementations
218 inline nonius::benchmark_registry &local_benchmark_registry()
220 static nonius::benchmark_registry registry;
226 #define NONIUS_LOCAL_BENCHMARK(name, ...) \
229 static ::nonius::benchmark_registrar \
230 NONIUS_DETAIL_UNIQUE_NAME(benchmark_registrar)(local_benchmark_registry(), name, __VA_ARGS__); \
233 NONIUS_LOCAL_BENCHMARK("NEDirectConvolutionLayer_NCHW", [](nonius::chronometer meter) {
234 NEDirectConvolutionLayer conv;
237 Configuration p{meter};
242 Tensor bias_tensor{};
244 src_tensor.allocator()->init(p.src_info<NCHW>());
245 dst_tensor.allocator()->init(p.dst_info<NCHW>());
246 ker_tensor.allocator()->init(p.ker_info<NCHW>());
247 bias_tensor.allocator()->init(p.bias_info());
251 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
252 p.pad_stride_info()));
256 meter.measure([&](int) {
264 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
266 src_tensor.allocator()->allocate();
267 ker_tensor.allocator()->allocate();
268 bias_tensor.allocator()->allocate();
269 dst_tensor.allocator()->allocate();
272 meter.measure([&](int) { conv.run(); });
275 NONIUS_LOCAL_BENCHMARK("NEDirectConvolutionLayer_NHWC", [](nonius::chronometer meter) {
276 NEDirectConvolutionLayer conv;
279 Configuration p{meter};
284 Tensor bias_tensor{};
286 src_tensor.allocator()->init(p.src_info<NHWC>());
287 dst_tensor.allocator()->init(p.dst_info<NHWC>());
288 ker_tensor.allocator()->init(p.ker_info<NHWC>());
289 bias_tensor.allocator()->init(p.bias_info());
293 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
294 p.pad_stride_info()));
298 meter.measure([&](int) {
306 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
308 src_tensor.allocator()->allocate();
309 ker_tensor.allocator()->allocate();
310 bias_tensor.allocator()->allocate();
311 dst_tensor.allocator()->allocate();
314 meter.measure([&](int) { conv.run(); });
317 NONIUS_LOCAL_BENCHMARK("NEGEMMConvolutionLayer_NCHW", [](nonius::chronometer meter) {
318 NEGEMMConvolutionLayer conv;
321 Configuration p{meter};
326 Tensor bias_tensor{};
328 src_tensor.allocator()->init(p.src_info<NCHW>());
329 dst_tensor.allocator()->init(p.dst_info<NCHW>());
330 ker_tensor.allocator()->init(p.ker_info<NCHW>());
331 bias_tensor.allocator()->init(p.bias_info());
335 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
336 p.pad_stride_info()));
340 meter.measure([&](int) {
348 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
350 src_tensor.allocator()->allocate();
351 ker_tensor.allocator()->allocate();
352 bias_tensor.allocator()->allocate();
353 dst_tensor.allocator()->allocate();
356 meter.measure([&](int) { conv.run(); });
359 NONIUS_LOCAL_BENCHMARK("NEGEMMConvolutionLayer_NHWC", [](nonius::chronometer meter) {
360 NEGEMMConvolutionLayer conv;
363 Configuration p{meter};
368 Tensor bias_tensor{};
370 src_tensor.allocator()->init(p.src_info<NHWC>());
371 dst_tensor.allocator()->init(p.dst_info<NHWC>());
372 ker_tensor.allocator()->init(p.ker_info<NHWC>());
373 bias_tensor.allocator()->init(p.bias_info());
377 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
378 p.pad_stride_info()));
382 meter.measure([&](int) {
390 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
392 src_tensor.allocator()->allocate();
393 ker_tensor.allocator()->allocate();
394 bias_tensor.allocator()->allocate();
395 dst_tensor.allocator()->allocate();
398 meter.measure([&](int) { conv.run(); });
401 NONIUS_LOCAL_BENCHMARK("NEWinogradConvolutionLayer_NCHW", [](nonius::chronometer meter) {
402 NEWinogradConvolutionLayer conv;
405 Configuration p{meter};
410 Tensor bias_tensor{};
412 src_tensor.allocator()->init(p.src_info<NCHW>());
413 dst_tensor.allocator()->init(p.dst_info<NCHW>());
414 ker_tensor.allocator()->init(p.ker_info<NCHW>());
415 bias_tensor.allocator()->init(p.bias_info());
419 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
420 p.pad_stride_info()));
424 meter.measure([&](int) {
432 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
434 src_tensor.allocator()->allocate();
435 ker_tensor.allocator()->allocate();
436 bias_tensor.allocator()->allocate();
437 dst_tensor.allocator()->allocate();
440 meter.measure([&](int) { conv.run(); });
443 NONIUS_LOCAL_BENCHMARK("NEWinogradConvolutionLayer_NHWC", [](nonius::chronometer meter) {
444 NEWinogradConvolutionLayer conv;
447 Configuration p{meter};
452 Tensor bias_tensor{};
454 src_tensor.allocator()->init(p.src_info<NHWC>());
455 dst_tensor.allocator()->init(p.dst_info<NHWC>());
456 ker_tensor.allocator()->init(p.ker_info<NHWC>());
457 bias_tensor.allocator()->init(p.bias_info());
461 check(conv.validate(src_tensor.info(), ker_tensor.info(), bias_tensor.info(), dst_tensor.info(),
462 p.pad_stride_info()));
466 meter.measure([&](int) {
474 conv.configure(&src_tensor, &ker_tensor, &bias_tensor, &dst_tensor, p.pad_stride_info());
476 src_tensor.allocator()->allocate();
477 ker_tensor.allocator()->allocate();
478 bias_tensor.allocator()->allocate();
479 dst_tensor.allocator()->allocate();
482 meter.measure([&](int) { conv.run(); });
485 extern "C" nonius::benchmark_registry &benchmark_functions(void)
487 return local_benchmark_registry();