2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "KernelGenerator.h"
19 #include <arm_compute/runtime/CL/CLFunctions.h> // Include all ARM Compute CL functions
20 #include <arm_compute/runtime/CL/CLFunctionsEx.h> // Include all ARM Compute EX CL functions
22 #include <AclActivationBuilder.h>
23 #include <AclFunction.h>
28 #include "ir/DataType.h"
29 #include "ir/InternalType.h"
30 #include "exec/NopFunction.h"
31 #include "exec/FunctionSequence.h"
32 #include "util/logging.h"
33 #include "util/Utils.h"
34 #include "AclKernelGen.h"
43 using ::onert::backend::acl_common::asAclClFunction;
44 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
45 ::arm_compute::ICLTensor, ::arm_compute::CLActivationLayer, acl_common::AclClFunction>;
47 KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
48 const ir::Operations &operations_ctx,
49 const std::shared_ptr<TensorBuilder> &tensor_builder)
50 : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
51 _current_op_seq_layout(ir::Layout::UNKNOWN)
56 void KernelGenerator::visit(const ir::OpSequence &op_seq)
58 // TODO Move this to IKernelGenerator
59 // (all derivatives have the same implementation for this)
60 assert(!_return_fn_seq);
61 _return_fn_seq = std::make_unique<exec::FunctionSequence>();
62 _return_fn_seq->enableDynamicShapeInferer(false);
64 _current_op_seq_layout = op_seq.getLayout();
65 for (const auto &operation_idx : op_seq.operations())
67 const auto &node = _operations_ctx.at(operation_idx);
69 _return_fn_seq->append(releaseFunction());
73 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
75 const auto ofm_index{node.getOutputs().at(0)};
76 const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
77 const auto block_size_index{
78 node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
80 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
81 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
82 auto block_size_tensor = _tensor_builder->at(block_size_index).get();
84 assert(_ctx.at(block_size_index).data());
86 auto fn = std::make_unique<::arm_compute::CLBatchToSpaceLayer>();
88 fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
90 auto acl_fn = asAclClFunction(std::move(fn));
92 _return_fn = std::move(acl_fn);
95 void KernelGenerator::visit(const ir::operation::Cast &node)
97 const auto ofm_index{node.getOutputs().at(0)};
98 const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
100 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
101 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
103 std::unique_ptr<::arm_compute::IFunction> fn;
104 if (ifm_tensor->data_type() == ofm_tensor->data_type())
106 auto l = std::make_unique<::arm_compute::CLCopy>();
108 l->configure(ifm_tensor->handle(), ofm_tensor->handle());
114 auto l = std::make_unique<::arm_compute::CLCast>();
116 // TODO Support converting float to int32 as round down
117 l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
122 auto acl_fn = asAclClFunction(std::move(fn));
124 _return_fn = std::move(acl_fn);
127 void KernelGenerator::visit(const ir::operation::Conv2D &node)
129 using ir::operation::Conv2D;
131 const auto ofm_index{node.getOutputs().at(0)};
132 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
133 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
134 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
136 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
137 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
138 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
139 const auto &ker_shape = _ctx.at(ker_index).shape();
140 const auto ker_height = ker_shape.dim(1);
141 const auto ker_width = ker_shape.dim(2);
143 const auto stride = node.param().stride;
144 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
145 ker_width, ker_height);
146 const auto activation = node.param().activation;
148 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
149 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
150 auto ker_tensor = _tensor_builder->at(ker_index).get();
151 auto bias_tensor = _tensor_builder->at(bias_index).get();
153 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
154 const auto act_info = acl_common::asActivationLayerInfo(activation);
156 auto fn = std::make_unique<::arm_compute::CLConvolutionLayer>(
157 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
159 fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
160 ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
161 ::arm_compute::Size2D(1U, 1U), act_info);
163 _return_fn = asAclClFunction(std::move(fn));
166 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
168 using ir::operation::DepthwiseConv2D;
170 const auto ofm_index{node.getOutputs().at(0)};
171 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
172 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
173 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
175 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
176 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
177 // Kernel format is [1, kernel_height, kernel_width, depth_out].
178 const auto &ker_shape = _ctx.at(ker_index).shape();
179 const auto ker_height = ker_shape.dim(1);
180 const auto ker_width = ker_shape.dim(2);
182 const auto stride = node.param().stride;
183 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
184 ker_width, ker_height);
185 const auto multiplier = node.param().multiplier;
186 const auto activation = node.param().activation;
188 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
189 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
190 auto ker_tensor = _tensor_builder->at(ker_index).get();
191 auto bias_tensor = _tensor_builder->at(bias_index).get();
193 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
194 const auto act_info = acl_common::asActivationLayerInfo(activation);
197 auto fn = std::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
199 fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
200 ofm_tensor->handle(), conv_info, multiplier, act_info);
202 _return_fn = asAclClFunction(std::move(fn));
206 void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
208 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
209 node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
211 const auto ofm_index{node.getOutputs().at(0)};
212 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
213 const auto activation = node.param().activation;
214 _return_fn = std::make_unique<exec::FunctionSequence>(
215 asAclClFunction(std::move(raw_fn)),
216 ActivationBuilder::generate(activation, ofm_tensor->handle()));
219 void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
221 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
222 node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
224 const auto ofm_index{node.getOutputs().at(0)};
225 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
226 const auto activation = node.param().activation;
227 _return_fn = std::make_unique<exec::FunctionSequence>(
228 asAclClFunction(std::move(raw_fn)),
229 ActivationBuilder::generate(activation, ofm_tensor->handle()));
232 void KernelGenerator::visit(const ir::operation::Concat &node)
234 const auto ofm_index{node.getOutputs().at(0)};
236 std::vector<ir::OperandIndex> input_indexes;
238 for (const auto &input : node.getInputs())
239 input_indexes.emplace_back(input);
241 const auto axis = node.param().axis;
243 // Concat elimination check
244 bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
247 // If concat eliminated, return a NOP IFunction
248 VERBOSE(acl_cl_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
249 _return_fn = std::make_unique<exec::NopFunction>();
253 auto output_tensor = _tensor_builder->at(ofm_index).get();
254 std::vector<::arm_compute::ICLTensor *> input_tensors;
255 for (auto &ifm_ind : input_indexes)
256 input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
258 std::unique_ptr<::arm_compute::IFunction> fn;
259 if (input_indexes.size() < 2)
261 auto l = std::make_unique<::arm_compute::CLCopy>();
262 l->configure(input_tensors.at(0), output_tensor->handle());
267 auto l = std::make_unique<::arm_compute::CLConcatenateLayer>();
268 const auto rank = _ctx.at(ofm_index).shape().rank();
269 const auto frontend_layout = _current_op_seq_layout;
270 const auto backend_layout = output_tensor->layout();
271 const auto fixed_axis =
272 acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
273 l->configure(input_tensors, output_tensor->handle(), fixed_axis);
277 auto acl_fn = asAclClFunction(std::move(fn));
279 _return_fn = std::move(acl_fn);
282 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
284 const auto output_index{node.getOutputs().at(0)};
285 auto output_tensor = _tensor_builder->at(output_index).get();
286 const auto activation = node.param().activation;
288 auto fn = acl_common::kernelGenFullyConnected<acl_common::AclClFunction, ::arm_compute::ICLTensor,
289 ::arm_compute::CLFullyConnectedReshapingLayer>(
290 node, _ctx, _tensor_builder, _current_op_seq_layout);
291 _return_fn = std::make_unique<exec::FunctionSequence>(
292 std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
295 void KernelGenerator::visit(const ir::operation::Mul &node)
297 const auto ofm_index{node.getOutputs().at(0)};
298 const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
299 const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
301 const auto activation = node.param().activation;
303 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
304 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
305 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
307 auto fn = std::make_unique<::arm_compute::CLPixelWiseMultiplication>();
309 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
310 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
312 _return_fn = std::make_unique<exec::FunctionSequence>(
313 asAclClFunction(std::move(fn)),
314 ActivationBuilder::generate(activation, ofm_tensor->handle()));
317 void KernelGenerator::visit(const ir::operation::Reduce &node)
319 const auto output_index{node.getOutputs().at(0)};
320 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
321 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
322 const auto keep_dims{node.param().keep_dims};
323 const auto reduce_type = node.param().reduce_type;
325 auto output_tensor = _tensor_builder->at(output_index).get();
326 auto input_tensor = _tensor_builder->at(input_index).get();
328 // Convert to ACL axes taking into account negative values and possible duplicates.
329 const auto &axes = _ctx.at(axes_index);
330 const auto input_rank = _ctx.at(input_index).shape().rank();
331 const auto frontend_layout = _current_op_seq_layout;
332 const auto backend_layout = input_tensor->layout();
334 std::unique_ptr<arm_compute::IFunction> fn;
335 if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
337 auto l = std::make_unique<::arm_compute::CLReduceMean>();
339 const auto acl_axes =
340 acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
341 l->configure(input_tensor->handle(), acl_axes, keep_dims, output_tensor->handle());
347 auto l = std::make_unique<::arm_compute::CLReduceOperation>(
348 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
350 const auto acl_axes = acl_common::asSet(axes, input_rank, frontend_layout, backend_layout);
351 l->configure(input_tensor->handle(), output_tensor->handle(), acl_axes, keep_dims,
352 acl_common::convertReduceType(reduce_type));
357 auto acl_fn = asAclClFunction(std::move(fn));
359 _return_fn = std::move(acl_fn);
362 void KernelGenerator::visit(const ir::operation::Reshape &node)
364 const auto output_index{node.getOutputs().at(0)};
365 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
367 auto output_tensor = _tensor_builder->at(output_index).get();
368 auto input_tensor = _tensor_builder->at(input_index).get();
370 // NOTE This operation must not be changed the layout from frontend to backend
371 // So, PermutationOperationPass makes layouts of frontend and backend the same.
372 const auto frontend_layout = _current_op_seq_layout;
373 const auto backend_layout = output_tensor->layout();
374 assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
375 frontend_layout == backend_layout);
376 UNUSED_RELEASE(frontend_layout);
377 UNUSED_RELEASE(backend_layout);
379 auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
381 fn->configure(input_tensor->handle(), output_tensor->handle());
383 auto acl_fn = asAclClFunction(std::move(fn));
385 _return_fn = std::move(acl_fn);
388 void KernelGenerator::visit(const ir::operation::Squeeze &node)
390 // Squeeze is identical to reshape except that it has an optional dimensions input.
391 // In addition, optional dims_index is ignored since output tensor already has squeezed shape
392 // by freezer and toco
393 // TODO Support multi-layout for frontend and backend
394 const auto output_index{node.getOutputs().at(0)};
395 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
396 const auto dims{node.param().dims};
397 const auto ndim{node.param().ndim};
401 auto output_tensor = _tensor_builder->at(output_index).get();
402 auto input_tensor = _tensor_builder->at(input_index).get();
403 auto fn = std::make_unique<arm_compute::CLReshapeLayer>();
404 fn->configure(input_tensor->handle(), output_tensor->handle());
405 auto acl_fn = asAclClFunction(std::move(fn));
406 _return_fn = std::move(acl_fn);
409 void KernelGenerator::visit(const ir::operation::Tanh &node)
411 const auto output_index{node.getOutputs().at(0)};
412 const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
414 auto output_tensor = _tensor_builder->at(output_index).get();
415 auto input_tensor = _tensor_builder->at(input_index).get();
417 auto fn = std::make_unique<arm_compute::CLActivationLayer>();
419 const ::arm_compute::ActivationLayerInfo act_info{
420 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
422 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
424 auto acl_fn = asAclClFunction(std::move(fn));
426 _return_fn = std::move(acl_fn);
429 void KernelGenerator::visit(const ir::operation::Softmax &node)
431 const auto output_index{node.getOutputs().at(0)};
432 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
434 const auto beta = node.param().beta;
436 auto output_tensor = _tensor_builder->at(output_index).get();
437 auto input_tensor = _tensor_builder->at(input_index).get();
439 auto fn = std::make_unique<::arm_compute::CLSoftmaxLayer>(
440 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
442 fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
444 auto acl_fn = asAclClFunction(std::move(fn));
446 _return_fn = std::move(acl_fn);
449 void KernelGenerator::visit(const ir::operation::Slice &node)
451 const auto output_index{node.getOutputs().at(0)};
452 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
453 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
454 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
456 auto outputData_tensor = _tensor_builder->at(output_index).get();
457 auto inputData_tensor = _tensor_builder->at(input_index).get();
458 const auto frontend_layout = _current_op_seq_layout;
459 const auto backend_layout = inputData_tensor->layout();
461 // Set initializers for indices data such as order of inputData
462 int input_rank = _ctx.at(input_index).shape().rank();
463 std::vector<int32_t> starts;
464 std::vector<int32_t> ends;
465 starts.resize(input_rank, 0);
466 ends.resize(input_rank, 0);
468 assert(_ctx.at(begins_index).data());
469 assert(_ctx.at(sizes_index).data());
470 auto beginData_base = _ctx.at(begins_index).data()->base();
471 auto sizeData_base = _ctx.at(sizes_index).data()->base();
472 const int beginData_size = _ctx.at(begins_index).shape().num_elements();
473 const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
477 UNUSED_RELEASE(beginData_size);
478 UNUSED_RELEASE(sizeData_size);
480 assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
481 assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
482 assert(beginData_size == input_rank);
483 assert(sizeData_size == input_rank);
485 assert(beginData_base != nullptr);
486 for (int n = 0; n < input_rank; ++n)
488 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
492 int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
493 starts[axis] = begin_value;
495 int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
496 ends[axis] = begin_value + size_value;
500 ::arm_compute::Coordinates starts_set;
501 ::arm_compute::Coordinates ends_set;
503 for (size_t i = 0; i < starts.size(); ++i)
505 starts_set.set(i, starts[i]);
506 ends_set.set(i, ends[i]);
509 auto fn = std::make_unique<::arm_compute::CLSlice>();
511 fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
513 auto acl_fn = asAclClFunction(std::move(fn));
515 _return_fn = std::move(acl_fn);
518 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
520 const auto output_index{node.getOutputs().at(0)};
521 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
522 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
523 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
524 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
526 auto outputData_tensor = _tensor_builder->at(output_index).get();
527 auto inputData_tensor = _tensor_builder->at(input_index).get();
528 const auto frontend_layout = _current_op_seq_layout;
529 const auto backend_layout = inputData_tensor->layout();
531 // Set initializers for indices data such as order of inputData
532 int input_rank = _ctx.at(input_index).shape().rank();
533 std::vector<int32_t> starts;
534 std::vector<int32_t> ends;
535 std::vector<int32_t> strides;
536 starts.resize(input_rank, 0);
537 ends.resize(input_rank, 0);
538 strides.resize(input_rank, 0);
540 assert(_ctx.at(starts_index).data());
541 assert(_ctx.at(ends_index).data());
542 assert(_ctx.at(strides_index).data());
543 auto startData_base = _ctx.at(starts_index).data()->base();
544 auto endData_base = _ctx.at(ends_index).data()->base();
545 auto stridesData_base = _ctx.at(strides_index).data()->base();
546 const int startData_size = _ctx.at(starts_index).shape().num_elements();
547 const int endData_size = _ctx.at(ends_index).shape().num_elements();
548 const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
552 UNUSED_RELEASE(startData_size);
553 UNUSED_RELEASE(endData_size);
554 UNUSED_RELEASE(stridesData_size);
556 assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
557 assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
558 assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
559 assert(startData_size == input_rank);
560 assert(endData_size == input_rank);
561 assert(stridesData_size == input_rank);
563 assert(startData_base != nullptr);
564 for (int n = 0; n < input_rank; ++n)
566 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
570 int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
571 starts[axis] = start_value;
573 int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
574 ends[axis] = end_value;
576 int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
577 strides[axis] = strides_value;
581 // Set mask bits such as order of inputData
582 const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank,
583 frontend_layout, backend_layout);
584 const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank,
585 frontend_layout, backend_layout);
586 const auto shrink_axis_mask = acl_common::ReorderBits<int32_t>(
587 node.param().shrink_axis_mask, input_rank, frontend_layout, backend_layout);
589 ::arm_compute::Coordinates starts_set;
590 ::arm_compute::Coordinates ends_set;
591 ::arm_compute::BiStrides strides_set;
593 for (size_t i = 0; i < starts.size(); ++i)
595 starts_set.set(i, starts[i]);
596 ends_set.set(i, ends[i]);
597 strides_set.set(i, strides[i]);
600 auto fn = std::make_unique<::arm_compute::CLStridedSlice>();
602 fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
603 strides_set, begin_mask, end_mask, shrink_axis_mask);
605 auto acl_fn = asAclClFunction(std::move(fn));
607 _return_fn = std::move(acl_fn);
610 void KernelGenerator::visit(const ir::operation::Transpose &node)
612 const auto ofm_idx{node.getOutputs().at(0)};
613 const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
614 const auto &perm{node.param().perm};
616 const auto rank = _ctx.at(ifm_idx).shape().rank();
618 auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
619 auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
620 const auto frontend_layout = _current_op_seq_layout;
621 const auto backend_layout = ifm_tensor->layout();
623 std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
625 auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
626 rank, pv, frontend_layout, backend_layout);
628 auto fn = std::make_unique<::arm_compute::CLPermute>();
630 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
632 auto acl_fn = asAclClFunction(std::move(fn));
634 _return_fn = std::move(acl_fn);
637 void KernelGenerator::visit(const ir::operation::Add &node)
639 const auto ofm_index{node.getOutputs().at(0)};
640 const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
641 const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
643 const auto activation = node.param().activation;
645 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
646 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
647 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
649 auto fn = std::make_unique<::arm_compute::CLArithmeticAddition>();
651 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
652 arm_compute::ConvertPolicy::SATURATE);
654 _return_fn = std::make_unique<exec::FunctionSequence>(
655 asAclClFunction(std::move(fn)),
656 ActivationBuilder::generate(activation, ofm_tensor->handle()));
659 void KernelGenerator::visit(const ir::operation::Sub &node)
661 const auto ofm_index{node.getOutputs().at(0)};
662 const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
663 const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
665 const auto activation = node.param().activation;
667 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
668 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
669 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
671 auto fn = std::make_unique<::arm_compute::CLArithmeticSubtraction>();
673 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
674 arm_compute::ConvertPolicy::SATURATE);
676 _return_fn = std::make_unique<exec::FunctionSequence>(
677 asAclClFunction(std::move(fn)),
678 ActivationBuilder::generate(activation, ofm_tensor->handle()));
681 void KernelGenerator::visit(const ir::operation::Div &node)
683 const auto ofm_index{node.getOutputs().at(0)};
684 const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
685 const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
687 const auto activation = node.param().activation;
689 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
690 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
691 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
693 auto fn = std::make_unique<::arm_compute::CLArithmeticDivision>();
695 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
697 _return_fn = std::make_unique<exec::FunctionSequence>(
698 asAclClFunction(std::move(fn)),
699 ActivationBuilder::generate(activation, ofm_tensor->handle()));
702 void KernelGenerator::visit(const ir::operation::Exp &node)
704 const auto output_index{node.getOutputs().at(0)};
705 const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
707 auto output_tensor = _tensor_builder->at(output_index).get();
708 auto input_tensor = _tensor_builder->at(input_index).get();
710 auto fn = std::make_unique<::arm_compute::CLExpLayer>();
712 fn->configure(input_tensor->handle(), output_tensor->handle());
714 auto acl_fn = asAclClFunction(std::move(fn));
716 _return_fn = std::move(acl_fn);
719 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
721 const auto output_index{node.getOutputs().at(0)};
722 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
724 auto output_tensor = _tensor_builder->at(output_index).get();
725 auto input_tensor = _tensor_builder->at(input_index).get();
727 auto fn = std::make_unique<::arm_compute::CLReshapeLayer>();
729 fn->configure(input_tensor->handle(), output_tensor->handle());
731 auto acl_fn = asAclClFunction(std::move(fn));
733 _return_fn = std::move(acl_fn);
736 void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
738 const auto ofm_index{node.getOutputs().at(0)};
739 const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
740 const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
741 const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
743 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
744 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
745 auto gamma_tensor = _tensor_builder->at(gamma_index).get();
746 auto beta_tensor = _tensor_builder->at(beta_index).get();
747 auto epsilon = node.param().epsilon;
748 auto activation = node.param().activation;
750 auto fn = std::make_unique<::arm_compute::CLInstanceNormalizationLayerEx>();
752 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
753 beta_tensor->handle(), epsilon);
755 _return_fn = std::make_unique<exec::FunctionSequence>(
756 asAclClFunction(std::move(fn)),
757 ActivationBuilder::generate(activation, ofm_tensor->handle()));
760 void KernelGenerator::visit(const ir::operation::Logistic &node)
762 const auto ofm_index{node.getOutputs().at(0)};
763 const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
765 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
766 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
768 const ::arm_compute::ActivationLayerInfo act_info{
769 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
771 auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
773 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
775 auto acl_fn = asAclClFunction(std::move(fn));
777 _return_fn = std::move(acl_fn);
780 void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
782 const auto output_index{node.getOutputs().at(0)};
783 const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
784 const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
786 auto output_tensor = _tensor_builder->at(output_index).get();
787 auto input0_tensor = _tensor_builder->at(input0_index).get();
788 auto input1_tensor = _tensor_builder->at(input1_index).get();
790 auto fn = std::make_unique<::arm_compute::CLBinaryLogicalOp>();
792 fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
793 ::arm_compute::BinaryLogicalOperation::AND);
795 auto acl_fn = asAclClFunction(std::move(fn));
797 _return_fn = std::move(acl_fn);
800 void KernelGenerator::visit(const ir::operation::LSTM &node)
802 _return_fn = acl_common::kernelGenLSTM<acl_common::AclClFunction, ::arm_compute::ICLTensor,
803 ::arm_compute::CLLSTMLayer>(node, _ctx, _tensor_builder);
806 void KernelGenerator::visit(const ir::operation::Comparison &node)
808 const auto output_index{node.getOutputs().at(0)};
809 const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
810 const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
812 const auto comparison_type = node.param().comparison_type;
814 auto output_tensor = _tensor_builder->at(output_index).get();
815 auto input0_tensor = _tensor_builder->at(input0_index).get();
816 auto input1_tensor = _tensor_builder->at(input1_index).get();
818 auto fn = std::make_unique<::arm_compute::CLComparison>();
820 fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
821 (arm_compute::ComparisonOperation)comparison_type);
823 auto acl_fn = asAclClFunction(std::move(fn));
825 _return_fn = std::move(acl_fn);
828 void KernelGenerator::visit(const ir::operation::Pack &node)
830 const auto output_index{node.getOutputs().at(0)};
831 auto axis{node.param().axis};
833 const auto output_rank = _ctx.at(output_index).shape().rank();
835 std::vector<ir::OperandIndex> input_indexes;
836 for (const auto &input_index : node.getInputs())
837 input_indexes.emplace_back(input_index);
839 auto output = _tensor_builder->at(output_index).get()->handle();
840 std::vector<arm_compute::ICLTensor *> inputs;
841 for (const auto &input_index : input_indexes)
842 inputs.emplace_back(_tensor_builder->at(input_index)->handle());
844 const auto frontend_layout = _current_op_seq_layout;
845 const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
849 axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
851 auto fn = std::make_unique<::arm_compute::CLStackLayer>();
853 // Disable applied dim_correction
854 std::vector<arm_compute::TensorShape> orig_inputs_acl_tensor_shapes;
855 for (const auto &input_index : input_indexes)
857 size_t input_rank = _ctx.at(input_index).shape().rank();
858 const auto &input_tensor = _tensor_builder->at(input_index);
859 orig_inputs_acl_tensor_shapes.emplace_back(input_tensor->info()->tensor_shape());
860 assert(input_rank == input_tensor->num_dimensions());
861 if (input_rank != input_tensor->info()->num_dimensions())
863 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
864 input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
865 _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
869 fn->configure(inputs, axis, output);
871 // Revert disabling applied dim_correction
872 assert(inputs.size() == orig_inputs_acl_tensor_shapes.size());
873 for (size_t i = 0; i < inputs.size(); ++i)
875 inputs.at(i)->info()->set_tensor_shape(orig_inputs_acl_tensor_shapes.at(i));
878 _return_fn = asAclClFunction(std::move(fn));
881 void KernelGenerator::visit(const ir::operation::Permute &node)
883 const auto ofm_idx{node.getOutputs().at(0)};
884 const auto ifm_idx{node.getInputs().at(0)};
885 const auto permute_type = node.getPermuteType();
886 auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
887 auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
888 const auto rank = _ctx.at(ofm_idx).shape().rank();
889 assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
891 std::unique_ptr<::arm_compute::IFunction> fn;
892 arm_compute::PermutationVector pv;
893 if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4)
896 pv = arm_compute::PermutationVector{2, 0, 1};
898 auto l = std::make_unique<::arm_compute::CLPermute>();
900 l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
904 else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
907 pv = arm_compute::PermutationVector{1, 2, 0};
909 auto l = std::make_unique<::arm_compute::CLPermute>();
911 l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
917 auto l = std::make_unique<::arm_compute::CLCopy>();
919 l->configure(ifm_tensor->handle(), ofm_tensor->handle());
924 auto acl_fn = asAclClFunction(std::move(fn));
926 _return_fn = std::move(acl_fn);
929 void KernelGenerator::visit(const ir::operation::RSQRT &node)
931 const auto ofm_index{node.getOutputs().at(0)};
932 const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
934 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
935 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
937 auto fn = std::make_unique<::arm_compute::CLRsqrtLayer>();
939 fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
941 _return_fn = asAclClFunction(std::move(fn));
944 void KernelGenerator::visit(const ir::operation::ReLU &node)
946 const auto output_index{node.getOutputs().at(0)};
947 const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
949 auto output_tensor = _tensor_builder->at(output_index).get();
950 auto input_tensor = _tensor_builder->at(input_index).get();
952 auto fn = std::make_unique<arm_compute::CLActivationLayer>();
954 const ::arm_compute::ActivationLayerInfo act_info{
955 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
957 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
959 auto acl_fn = asAclClFunction(std::move(fn));
961 _return_fn = std::move(acl_fn);
964 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
966 const auto ofm_index{node.getOutputs().at(0)};
968 const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
970 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
971 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
973 auto fn = std::make_unique<::arm_compute::CLScale>();
975 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
976 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
977 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
979 auto acl_fn = asAclClFunction(std::move(fn));
981 _return_fn = std::move(acl_fn);
984 void KernelGenerator::visit(const ir::operation::ReLU1 &node)
986 const auto ofm_index{node.getOutputs().at(0)};
987 const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
989 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
990 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
992 const ::arm_compute::ActivationLayerInfo act_info{
993 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
995 auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
997 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
999 auto acl_fn = asAclClFunction(std::move(fn));
1001 _return_fn = std::move(acl_fn);
1004 void KernelGenerator::visit(const ir::operation::ReLU6 &node)
1006 const auto ofm_index{node.getOutputs().at(0)};
1007 const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
1009 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1010 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1012 const ::arm_compute::ActivationLayerInfo act_info{
1013 ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
1015 auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
1017 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
1019 auto acl_fn = asAclClFunction(std::move(fn));
1021 _return_fn = std::move(acl_fn);
1024 void KernelGenerator::visit(const ir::operation::RNN &node)
1026 const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
1027 const auto hidden_state_out_index{
1028 node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
1030 const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
1031 const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
1032 const auto recurrent_weights_index{
1033 node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
1034 const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
1035 const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
1037 const auto activation = node.param().activation;
1039 auto output_tensor = _tensor_builder->at(output_index).get();
1040 auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
1042 auto input_tensor = _tensor_builder->at(input_index).get();
1043 auto weights_tensor = _tensor_builder->at(weights_index).get();
1044 auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
1045 auto bias_tensor = _tensor_builder->at(bias_index).get();
1046 auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
1047 auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
1049 auto copy_layer = std::make_unique<::arm_compute::CLCopy>();
1050 copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
1051 _return_fn = asAclClFunction(std::move(copy_layer));
1053 auto fn = std::make_unique<::arm_compute::CLRNNLayer>(
1054 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
1055 fn->configure(input_tensor->handle(), weights_tensor->handle(),
1056 recurrent_weights_tensor->handle(), bias_tensor->handle(),
1057 hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
1058 _return_fn = asAclClFunction(std::move(fn));
1061 void KernelGenerator::visit(const ir::operation::Floor &node)
1063 const auto ofm_index{node.getOutputs().at(0)};
1064 const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
1066 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1067 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1069 auto fn = std::make_unique<::arm_compute::CLFloor>();
1071 fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
1073 auto acl_fn = asAclClFunction(std::move(fn));
1075 _return_fn = std::move(acl_fn);
1078 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1080 const auto ofm_index{node.getOutputs().at(0)};
1081 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
1082 const auto block_size_index{
1083 node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
1084 const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
1086 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1087 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1088 auto block_size_tensor = _tensor_builder->at(block_size_index).get();
1089 auto paddings_tensor = _tensor_builder->at(paddings_index).get();
1091 assert(_ctx.at(block_size_index).data());
1092 assert(_ctx.at(paddings_index).data());
1094 std::unique_ptr<::arm_compute::IFunction> fn;
1096 auto l = std::make_unique<::arm_compute::CLSpaceToBatchLayer>();
1097 l->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
1098 ofm_tensor->handle());
1101 auto acl_fn = asAclClFunction(std::move(fn));
1103 _return_fn = std::move(acl_fn);
1106 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1108 const auto ofm_index{node.getOutputs().at(0)};
1109 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1111 auto block_size = node.param().block_size;
1113 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1114 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1116 auto fn = std::make_unique<::arm_compute::CLSpaceToDepthLayer>();
1118 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
1120 auto acl_fn = asAclClFunction(std::move(fn));
1122 _return_fn = std::move(acl_fn);
1125 void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
1127 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::CLPoolingLayer>(
1128 node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
1130 const auto ofm_index{node.getOutputs().at(0)};
1131 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1132 const auto activation = node.param().activation;
1133 _return_fn = std::make_unique<exec::FunctionSequence>(
1134 asAclClFunction(std::move(raw_fn)),
1135 ActivationBuilder::generate(activation, ofm_tensor->handle()));
1138 void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
1140 const auto output_index{node.getOutputs().at(0)};
1141 const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
1142 const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
1144 auto output_tensor = _tensor_builder->at(output_index).get();
1145 auto lookups_tensor = _tensor_builder->at(lookups_index).get();
1146 auto values_tensor = _tensor_builder->at(values_index).get();
1148 auto fn = std::make_unique<::arm_compute::CLEmbeddingLookup>();
1150 fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
1152 auto acl_fn = asAclClFunction(std::move(fn));
1154 _return_fn = std::move(acl_fn);
1157 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
1159 const auto ofm_index{node.getOutputs().at(0)};
1160 const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
1162 // {CL|Neon}L2Normalization performs the reduction only along dimension 0
1163 // L2 Normalization always performs the reduction along the depth axis
1164 // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
1165 // choosing normalization parameters as below
1167 const auto &ifm_shape = _ctx.at(ifm_index).shape();
1168 // TODO Support optional constant dimension that normalization would be performed on
1169 const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
1171 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
1172 float alpha = 1.0f; // In the implementation to make alpha_ become 1
1173 float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
1174 float bias = 0.0f; // Don't offset the reduction.
1176 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1177 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1179 const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
1180 radius, alpha, beta, bias, false);
1182 auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
1184 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1186 auto acl_fn = asAclClFunction(std::move(fn));
1188 _return_fn = std::move(acl_fn);
1191 void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
1193 const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
1194 const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
1196 const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
1197 const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
1198 const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
1200 auto output_tensor = _tensor_builder->at(output_index).get();
1201 auto hits_tensor = _tensor_builder->at(hits_index).get();
1203 auto lookups_tensor = _tensor_builder->at(lookups_index).get();
1204 auto keys_tensor = _tensor_builder->at(keys_index).get();
1205 auto values_tensor = _tensor_builder->at(values_index).get();
1207 auto fn = std::make_unique<::arm_compute::CLHashtableLookup>();
1209 fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
1210 output_tensor->handle(), hits_tensor->handle());
1212 auto acl_fn = asAclClFunction(std::move(fn));
1214 _return_fn = std::move(acl_fn);
1217 void KernelGenerator::visit(const ir::operation::PReLU &node)
1219 const auto ofm_index{node.getOutputs().at(0)};
1220 const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
1221 const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
1223 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1224 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1225 auto alpha_tensor = _tensor_builder->at(alpha_index).get();
1227 auto fn = std::make_unique<::arm_compute::CLPReluLayer>();
1229 fn->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
1231 auto acl_fn = asAclClFunction(std::move(fn));
1233 _return_fn = std::move(acl_fn);
1236 void KernelGenerator::visit(const ir::operation::TransposeConv &node)
1238 const auto ofm_index{node.getOutputs().at(0)};
1239 const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
1240 const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
1242 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
1243 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
1244 const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
1246 const auto stride = node.param().stride;
1248 assert((node.param().padding.type == ir::PaddingType::SAME) ||
1249 (node.param().padding.type == ir::PaddingType::VALID));
1250 auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
1251 ker_shape.W, ker_shape.H);
1252 uint32_t invalid_horizontal = 0;
1253 uint32_t invalid_vertical = 0;
1254 if (node.param().padding.type == ir::PaddingType::VALID)
1256 invalid_horizontal =
1257 ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
1258 invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
1261 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1262 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1263 auto ker_tensor = _tensor_builder->at(ker_index).get();
1265 const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
1267 auto fn = std::make_unique<::arm_compute::CLTransposeConvLayer>(
1268 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
1270 fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
1271 tconv_info, invalid_horizontal, invalid_vertical);
1273 auto acl_fn = asAclClFunction(std::move(fn));
1275 _return_fn = std::move(acl_fn);
1278 void KernelGenerator::visit(const ir::operation::SQRT &node)
1280 const auto output_index{node.getOutputs().at(0)};
1281 const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
1283 auto output_tensor = _tensor_builder->at(output_index).get();
1284 auto input_tensor = _tensor_builder->at(input_index).get();
1286 const ::arm_compute::ActivationLayerInfo act_info{
1287 ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
1289 auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
1291 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
1293 auto acl_fn = asAclClFunction(std::move(fn));
1295 _return_fn = std::move(acl_fn);
1298 void KernelGenerator::visit(const ir::operation::LogicalOr &node)
1300 const auto output_index{node.getOutputs().at(0)};
1301 const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
1302 const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
1304 auto output_tensor = _tensor_builder->at(output_index).get();
1305 auto input0_tensor = _tensor_builder->at(input0_index).get();
1306 auto input1_tensor = _tensor_builder->at(input1_index).get();
1308 auto fn = std::make_unique<::arm_compute::CLBitwiseOr>();
1310 fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
1312 auto acl_fn = asAclClFunction(std::move(fn));
1314 _return_fn = std::move(acl_fn);
1317 void KernelGenerator::visit(const ir::operation::LogicalNot &node)
1319 const auto output_index{node.getOutputs().at(0)};
1320 const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
1322 auto output_tensor = _tensor_builder->at(output_index).get();
1323 auto input_tensor = _tensor_builder->at(input_index).get();
1325 auto fn = std::make_unique<::arm_compute::CLBitwiseNot>();
1327 fn->configure(input_tensor->handle(), output_tensor->handle());
1329 auto acl_fn = asAclClFunction(std::move(fn));
1331 _return_fn = std::move(acl_fn);
1334 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1336 const auto ofm_index{node.getOutputs().at(0)};
1337 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1338 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1340 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1341 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1342 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1344 auto fn = std::make_unique<::arm_compute::CLElementwiseSquaredDiff>();
1346 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1348 auto acl_fn = asAclClFunction(std::move(fn));
1350 _return_fn = std::move(acl_fn);
1353 void KernelGenerator::visit(const ir::operation::TopKV2 &node)
1355 const auto outputValues_index{node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_VALUES)};
1356 const auto outputIndices_index{
1357 node.getOutputs().at(ir::operation::TopKV2::Output::OUTPUT_INDICES)};
1359 const auto inputData_index{node.getInputs().at(ir::operation::TopKV2::Input::INPUT)};
1361 // Currently, we only support the vector input.
1362 assert(_ctx.at(inputData_index).shape().rank() == 1 ||
1363 _ctx.at(inputData_index).shape().rank() == 2);
1365 const auto k = node.param().k;
1367 auto values_tensor = _tensor_builder->at(outputValues_index).get();
1368 auto indices_tensor = _tensor_builder->at(outputIndices_index).get();
1369 auto input_tensor = _tensor_builder->at(inputData_index).get();
1371 auto fn = std::make_unique<::arm_compute::CLTopKV2>();
1373 fn->configure(input_tensor->handle(), k, values_tensor->handle(), indices_tensor->handle());
1375 auto acl_fn = asAclClFunction(std::move(fn));
1377 _return_fn = std::move(acl_fn);
1380 void KernelGenerator::visit(const ir::operation::Gather &node)
1382 const auto ofm_index{node.getOutputs().at(0)};
1384 const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
1385 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
1387 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1388 const auto axis_raw = node.param().axis;
1389 const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
1390 const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
1392 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1393 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1394 auto indices_tensor = _tensor_builder->at(indices_index).get();
1396 // NOTE The frontend layout and backend layout must be the same for this operation.
1397 // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
1398 // is not not efficient even if it works well. If so, it would be better to set the
1399 // layout of these backend tensors to the same layout.
1400 // There is also one thing we have to think about. This operation depends on the layout of
1401 // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
1402 // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
1403 // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
1404 const auto backend_layout = ofm_tensor->layout();
1405 UNUSED_RELEASE(backend_layout);
1406 assert(backend_layout == ifm_tensor->layout());
1407 assert(backend_layout == indices_tensor->layout());
1408 assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
1410 auto fn = std::make_unique<::arm_compute::CLGatherEx>();
1412 // input is n-D, indices k-D, output is (n + k - 1)-D
1413 size_t n = ifm_rank;
1414 assert(n == ifm_tensor->num_dimensions());
1415 size_t k = _ctx.at(indices_index).shape().rank();
1416 assert(k == indices_tensor->num_dimensions());
1418 // Disable applied dim_correction
1419 const auto orig_ifm_acl_tensor_shape = ifm_tensor->info()->tensor_shape();
1420 if (n != ifm_tensor->info()->num_dimensions())
1422 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1423 const auto ifm = _ctx.at(ifm_index);
1424 ifm_tensor->info()->set_tensor_shape(
1425 acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
1427 const auto orig_indice_acl_tensor_shape = indices_tensor->info()->tensor_shape();
1428 if (k != indices_tensor->info()->num_dimensions())
1430 // This means that high dimension's value is 1 and indices tensor is applied dim_correction
1431 const auto indices = _ctx.at(indices_index);
1432 indices_tensor->info()->set_tensor_shape(
1433 acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
1436 fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
1438 // Revert disabling applied dim_correction
1439 ifm_tensor->info()->set_tensor_shape(orig_ifm_acl_tensor_shape);
1440 indices_tensor->info()->set_tensor_shape(orig_indice_acl_tensor_shape);
1442 auto acl_fn = asAclClFunction(std::move(fn));
1444 _return_fn = std::move(acl_fn);
1447 void KernelGenerator::visit(const ir::operation::Neg &node)
1449 const auto ofm_index{node.getOutputs().at(0)};
1450 const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
1452 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1453 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1455 auto fn = std::make_unique<::arm_compute::CLNeg>();
1457 fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
1459 auto acl_fn = asAclClFunction(std::move(fn));
1461 _return_fn = std::move(acl_fn);
1464 void KernelGenerator::visit(const ir::operation::Abs &node)
1466 const auto output_index{node.getOutputs().at(0)};
1467 const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
1469 auto output_tensor = _tensor_builder->at(output_index).get();
1470 auto input_tensor = _tensor_builder->at(input_index).get();
1472 const ::arm_compute::ActivationLayerInfo act_info{
1473 ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
1475 auto fn = std::make_unique<::arm_compute::CLActivationLayer>();
1477 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
1479 auto acl_fn = asAclClFunction(std::move(fn));
1481 _return_fn = std::move(acl_fn);
1484 void KernelGenerator::visit(const ir::operation::ArgMax &node)
1486 const auto ofm_index{node.getOutputs().at(0)};
1487 const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
1489 auto ifm_shape = _ctx.at(ifm_index).shape();
1490 auto ofm_shape = _ctx.at(ofm_index).shape();
1492 assert((ifm_shape.rank() - 1) == ofm_shape.rank());
1494 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1495 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1496 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1497 auto frontend_layout = _current_op_seq_layout;
1498 auto backend_layout = ifm_tensor->layout();
1500 int axis_value = node.param().axis;
1503 axis_value += ifm_rank;
1507 acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
1509 auto fn = std::make_unique<::arm_compute::CLArgMinMaxLayer>();
1511 fn->configure(ifm_tensor->handle(), acl_axis, ofm_tensor->handle(),
1512 ::arm_compute::ReductionOperation::ARG_IDX_MAX);
1514 auto acl_fn = asAclClFunction(std::move(fn));
1516 _return_fn = std::move(acl_fn);
1519 void KernelGenerator::visit(const ir::operation::Dequantize &node)
1521 const auto output_index{node.getOutputs().at(0)};
1522 const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
1524 auto output_tensor = _tensor_builder->at(output_index).get();
1525 auto input_tensor = _tensor_builder->at(input_index).get();
1527 auto fn = std::make_unique<::arm_compute::CLDequantizationLayer>();
1529 fn->configure(input_tensor->handle(), output_tensor->handle());
1531 auto acl_fn = asAclClFunction(std::move(fn));
1533 _return_fn = std::move(acl_fn);
1536 void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
1538 const auto ofm_index{node.getOutputs().at(0)};
1539 const auto ifm_index{
1540 node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
1542 auto radius = node.param().radius;
1543 auto alpha = node.param().alpha;
1544 auto beta = node.param().beta;
1545 auto bias = node.param().bias;
1547 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1548 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1550 const auto norm_info = ::arm_compute::NormalizationLayerInfo(
1551 ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
1553 auto fn = std::make_unique<::arm_compute::CLNormalizationLayer>();
1555 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
1557 auto acl_fn = asAclClFunction(std::move(fn));
1559 _return_fn = std::move(acl_fn);
1562 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
1564 const auto output_index{node.getOutputs().at(0)};
1565 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
1567 auto block_size = node.param().block_size;
1568 assert(block_size > 0);
1570 auto output_tensor = _tensor_builder->at(output_index).get();
1571 auto input_tensor = _tensor_builder->at(input_index).get();
1573 auto fn = std::make_unique<::arm_compute::CLDepthToSpaceLayer>();
1575 fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
1577 auto acl_fn = asAclClFunction(std::move(fn));
1579 _return_fn = std::move(acl_fn);
1582 void KernelGenerator::visit(const ir::operation::Split &node)
1584 const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
1586 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1588 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1589 std::vector<ir::OperandIndex> output_indexes;
1590 for (const auto &output : node.getOutputs())
1591 output_indexes.emplace_back(output);
1593 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1594 std::vector<arm_compute::ICLTensor *> output_tensors;
1595 for (const auto &ofm_ind : output_indexes)
1596 output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
1598 const auto frontend_layout = _current_op_seq_layout;
1599 const auto backend_layout = ifm_tensor->layout();
1600 auto axis = node.param().axis;
1603 axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
1605 auto fn = std::make_unique<::arm_compute::CLSplit>();
1607 fn->configure(ifm_tensor->handle(), output_tensors, axis);
1609 _return_fn = asAclClFunction(std::move(fn));
1612 void KernelGenerator::visit(const ir::operation::Unpack &node)
1614 const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
1615 auto axis{node.param().axis};
1617 const auto input_rank = _ctx.at(input_index).shape().rank();
1619 std::vector<ir::OperandIndex> output_indexes;
1620 for (const auto &output_index : node.getOutputs())
1621 output_indexes.emplace_back(output_index);
1623 auto input = _tensor_builder->at(input_index).get()->handle();
1624 std::vector<arm_compute::ICLTensor *> outputs;
1625 for (const auto &output_index : output_indexes)
1626 outputs.emplace_back(_tensor_builder->at(output_index)->handle());
1628 const auto frontend_layout = _current_op_seq_layout;
1629 const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
1632 axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
1634 // Disable applied dim_correction
1635 std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
1636 for (const auto &output_index : output_indexes)
1638 size_t output_rank = _ctx.at(output_index).shape().rank();
1639 const auto &output_tensor = _tensor_builder->at(output_index);
1640 orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
1641 assert(output_rank == output_tensor->num_dimensions());
1642 if (output_rank != output_tensor->info()->num_dimensions())
1644 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1645 output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
1646 _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
1650 auto fn = std::make_unique<::arm_compute::CLUnstack>();
1652 fn->configure(input, outputs, axis);
1654 _return_fn = asAclClFunction(std::move(fn));
1657 void KernelGenerator::visit(const ir::operation::Pad &node)
1659 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
1660 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
1661 const auto output_index{node.getOutputs().at(0)};
1662 assert(_ctx.at(pad_index).data());
1664 auto rank = _ctx.at(input_index).shape().rank();
1665 auto pad_base = _ctx.at(pad_index).data()->base();
1667 auto input_type = _ctx.at(input_index).typeInfo();
1668 auto data_type = acl_common::asDataType(input_type.type());
1669 auto quant_info = ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset());
1670 const auto pixel_value = ::arm_compute::PixelValue(0, data_type, quant_info);
1672 auto input = _tensor_builder->at(input_index).get()->handle();
1673 auto output = _tensor_builder->at(output_index).get()->handle();
1675 const auto frontend_layout = _current_op_seq_layout;
1676 const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
1678 ::arm_compute::PaddingList padding_list;
1679 padding_list.resize(rank);
1680 for (int32_t n = 0; n < rank; ++n)
1682 const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
1685 acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
1686 padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
1688 auto fn = std::make_unique<::arm_compute::CLPadLayer>();
1690 // Disable applied dim_correction
1691 size_t input_rank = _ctx.at(input_index).shape().rank();
1692 const auto &input_tensor = _tensor_builder->at(input_index);
1693 assert(input_rank == input_tensor->num_dimensions());
1694 if (input_rank != input_tensor->info()->num_dimensions())
1696 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1697 input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
1698 _ctx.at(input_index).shape(), frontend_layout, backend_layout, false));
1701 fn->configure(input, output, padding_list, pixel_value);
1703 // Do not revert disabling applied dim_correction CLPadKernel has cl kernel for 4-dimension
1704 // It would produce a mistach of result
1706 _return_fn = asAclClFunction(std::move(fn));
1709 void KernelGenerator::visit(const ir::operation::Min &node)
1711 const auto ofm_index{node.getOutputs().at(0)};
1712 const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
1713 const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
1715 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1716 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1717 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1719 auto fn = std::make_unique<::arm_compute::CLElementwiseMin>();
1721 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1723 auto acl_fn = asAclClFunction(std::move(fn));
1725 _return_fn = std::move(acl_fn);
1728 void KernelGenerator::visit(const ir::operation::Max &node)
1730 const auto ofm_index{node.getOutputs().at(0)};
1731 const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
1732 const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
1734 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1735 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1736 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1738 auto fn = std::make_unique<::arm_compute::CLElementwiseMax>();
1740 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1742 auto acl_fn = asAclClFunction(std::move(fn));
1744 _return_fn = std::move(acl_fn);
1747 void KernelGenerator::visit(const ir::operation::ConvertFp32ToFp16 &node)
1749 const auto ofm_index{node.getOutputs().at(0)};
1750 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp32ToFp16::Input::INPUT)};
1752 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1753 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1755 auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
1757 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
1760 auto acl_fn = asAclClFunction(std::move(fn));
1762 _return_fn = std::move(acl_fn);
1765 void KernelGenerator::visit(const ir::operation::ConvertFp16ToFp32 &node)
1767 const auto ofm_index{node.getOutputs().at(0)};
1768 const auto ifm_index{node.getInputs().at(ir::operation::ConvertFp16ToFp32::Input::INPUT)};
1770 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1771 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1773 auto fn = std::make_unique<::arm_compute::CLDepthConvertLayer>();
1775 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), ::arm_compute::ConvertPolicy::SATURATE,
1778 auto acl_fn = asAclClFunction(std::move(fn));
1780 _return_fn = std::move(acl_fn);
1783 } // namespace acl_cl
1784 } // namespace backend
1785 } // namespace onert