2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "KernelGenerator.h"
19 #include <arm_compute/runtime/NEON/NEFunctions.h> // Include all ARM Compute NEON functions
20 #include <arm_compute/runtime/NEON/NEFunctionsEx.h> // Include all ARM Compute EX NEON functions
21 #include <arm_compute/runtime/CPP/functions/CPPOneHotEx.h>
23 #include <AclActivationBuilder.h>
24 #include <AclFunction.h>
29 #include "ir/DataType.h"
30 #include "ir/InternalType.h"
31 #include "exec/NopFunction.h"
32 #include "util/logging.h"
33 #include "util/Utils.h"
34 #include "AclKernelGen.h"
43 using ::onert::backend::acl_common::asAclFunction;
44 using ActivationBuilder = ::onert::backend::acl_common::AclActivationBuilder<
45 ::arm_compute::ITensor, ::arm_compute::NEActivationLayer, acl_common::AclFunction>;
47 KernelGenerator::KernelGenerator(const ir::Operands &operands_ctx,
48 const ir::Operations &operations_ctx,
49 const std::shared_ptr<TensorBuilder> &tensor_builder)
50 : _ctx(operands_ctx), _operations_ctx(operations_ctx), _tensor_builder(tensor_builder),
51 _current_op_seq_layout(ir::Layout::UNKNOWN)
56 void KernelGenerator::visit(const ir::OpSequence &op_seq)
58 // TODO Move this to IKernelGenerator
59 // (all derivatives have the same implementation for this)
60 assert(!_return_fn_seq);
61 _return_fn_seq = std::make_unique<exec::FunctionSequence>();
62 _return_fn_seq->enableDynamicShapeInferer(false);
64 _current_op_seq_layout = op_seq.getLayout();
65 for (const auto &operation_idx : op_seq.operations())
67 const auto &node = _operations_ctx.at(operation_idx);
69 _return_fn_seq->append(releaseFunction());
73 void KernelGenerator::visit(const ir::operation::Abs &node)
75 const auto output_index{node.getOutputs().at(0)};
76 const auto input_index{node.getInputs().at(ir::operation::Abs::Input::INPUT)};
78 auto output_tensor = _tensor_builder->at(output_index).get();
79 auto input_tensor = _tensor_builder->at(input_index).get();
81 const ::arm_compute::ActivationLayerInfo act_info{
82 ::arm_compute::ActivationLayerInfo::ActivationFunction::ABS};
84 auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
86 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
88 auto acl_fn = asAclFunction(std::move(fn));
90 _return_fn = std::move(acl_fn);
93 void KernelGenerator::visit(const ir::operation::ArgMax &node)
95 const auto ofm_index{node.getOutputs().at(0)};
96 const auto ifm_index{node.getInputs().at(ir::operation::ArgMax::Input::INPUT)};
98 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
100 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
101 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
102 auto frontend_layout = _current_op_seq_layout;
103 auto backend_layout = ifm_tensor->layout();
105 int axis_value = node.param().axis;
108 axis_value += ifm_rank;
110 assert(axis_value >= 0 && axis_value < ifm_rank);
111 const auto fixed_axis =
112 acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
114 auto fn = std::make_unique<::arm_compute::NEArgMinMaxLayer>();
116 fn->configure(ifm_tensor->handle(), fixed_axis, ofm_tensor->handle(),
117 arm_compute::ReductionOperation::ARG_IDX_MAX);
119 auto acl_fn = asAclFunction(std::move(fn));
121 _return_fn = std::move(acl_fn);
124 void KernelGenerator::visit(const ir::operation::BatchToSpaceND &node)
126 const auto ofm_index{node.getOutputs().at(0)};
127 const auto ifm_index{node.getInputs().at(ir::operation::BatchToSpaceND::Input::INPUT)};
128 const auto block_size_index{
129 node.getInputs().at(ir::operation::BatchToSpaceND::Input::BLOCK_SIZE)};
131 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
132 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
133 auto block_size_tensor = _tensor_builder->at(block_size_index).get();
135 assert(_ctx.at(block_size_index).data());
137 auto fn = std::make_unique<::arm_compute::NEBatchToSpaceLayer>();
139 fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), ofm_tensor->handle());
141 auto acl_fn = asAclFunction(std::move(fn));
143 _return_fn = std::move(acl_fn);
146 void KernelGenerator::visit(const ir::operation::Cast &node)
148 const auto ofm_index{node.getOutputs().at(0)};
149 const auto ifm_index{node.getInputs().at(ir::operation::Cast::Input::INPUT)};
151 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
152 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
154 std::unique_ptr<::arm_compute::IFunction> fn;
155 if (ifm_tensor->data_type() == ofm_tensor->data_type())
157 auto l = std::make_unique<::arm_compute::NECopy>();
159 l->configure(ifm_tensor->handle(), ofm_tensor->handle());
165 auto l = std::make_unique<::arm_compute::NECast>();
167 l->configure(ifm_tensor->handle(), ofm_tensor->handle(), arm_compute::ConvertPolicy::SATURATE);
172 auto acl_fn = asAclFunction(std::move(fn));
174 _return_fn = std::move(acl_fn);
177 void KernelGenerator::visit(const ir::operation::Conv2D &node)
179 using ir::operation::Conv2D;
181 const auto ofm_index{node.getOutputs().at(0)};
182 const auto ifm_index{node.getInputs().at(Conv2D::Input::INPUT)};
183 const auto ker_index{node.getInputs().at(Conv2D::Input::KERNEL)};
184 const auto bias_index{node.getInputs().at(Conv2D::Input::BIAS)};
186 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
187 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
188 // Kernel format is [depth_out, kernel_height, kernel_width, depth_in].
189 const auto &ker_shape = _ctx.at(ker_index).shape();
190 const auto ker_height = ker_shape.dim(1);
191 const auto ker_width = ker_shape.dim(2);
193 const auto stride = node.param().stride;
194 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
195 ker_width, ker_height);
196 const auto activation = node.param().activation;
198 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
199 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
200 auto ker_tensor = _tensor_builder->at(ker_index).get();
201 auto bias_tensor = _tensor_builder->at(bias_index).get();
203 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
204 const auto act_info = acl_common::asActivationLayerInfo(activation);
206 auto fn = std::make_unique<::arm_compute::NEConvolutionLayer>(
207 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
209 fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
210 ofm_tensor->handle(), conv_info, ::arm_compute::WeightsInfo(),
211 ::arm_compute::Size2D(1U, 1U), act_info);
213 _return_fn = asAclFunction(std::move(fn));
216 void KernelGenerator::visit(const ir::operation::DepthToSpace &node)
218 const auto output_index{node.getOutputs().at(0)};
219 const auto input_index{node.getInputs().at(ir::operation::DepthToSpace::Input::INPUT)};
221 auto block_size = node.param().block_size;
222 assert(block_size > 0);
224 auto output_tensor = _tensor_builder->at(output_index).get();
225 auto input_tensor = _tensor_builder->at(input_index).get();
227 auto fn = std::make_unique<::arm_compute::NEDepthToSpaceLayer>();
229 fn->configure(input_tensor->handle(), output_tensor->handle(), block_size);
231 auto acl_fn = asAclFunction(std::move(fn));
233 _return_fn = std::move(acl_fn);
236 void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
238 using ir::operation::DepthwiseConv2D;
240 const auto ofm_index{node.getOutputs().at(0)};
241 const auto ifm_index{node.getInputs().at(DepthwiseConv2D::Input::INPUT)};
242 const auto ker_index{node.getInputs().at(DepthwiseConv2D::Input::KERNEL)};
243 const auto bias_index{node.getInputs().at(DepthwiseConv2D::Input::BIAS)};
245 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
246 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
247 // Kernel format is [1, kernel_height, kernel_width, depth_out].
248 const auto &ker_shape = _ctx.at(ker_index).shape();
249 const auto ker_height = ker_shape.dim(1);
250 const auto ker_width = ker_shape.dim(2);
252 const auto stride = node.param().stride;
253 const auto padding = ir::calculatePadding(node.param().padding, ifm_shape, ofm_shape, stride,
254 ker_width, ker_height);
255 const auto multiplier = node.param().multiplier;
256 const auto activation = node.param().activation;
258 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
259 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
260 auto ker_tensor = _tensor_builder->at(ker_index).get();
261 auto bias_tensor = _tensor_builder->at(bias_index).get();
263 const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
264 const auto act_info = acl_common::asActivationLayerInfo(activation);
267 auto fn = std::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
269 fn->configure(ifm_tensor->handle(), ker_tensor->handle(), bias_tensor->handle(),
270 ofm_tensor->handle(), conv_info, multiplier, act_info);
272 _return_fn = asAclFunction(std::move(fn));
276 void KernelGenerator::visit(const ir::operation::Dequantize &node)
278 const auto output_index{node.getOutputs().at(0)};
279 const auto input_index{node.getInputs().at(ir::operation::Dequantize::Input::INPUT)};
281 auto output_tensor = _tensor_builder->at(output_index).get();
282 auto input_tensor = _tensor_builder->at(input_index).get();
284 auto fn = std::make_unique<::arm_compute::NEDequantizationLayer>();
286 fn->configure(input_tensor->handle(), output_tensor->handle());
288 auto acl_fn = asAclFunction(std::move(fn));
290 _return_fn = std::move(acl_fn);
293 void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
295 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
296 node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::MAX);
298 const auto ofm_index{node.getOutputs().at(0)};
299 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
300 const auto activation = node.param().activation;
301 _return_fn = std::make_unique<exec::FunctionSequence>(
302 asAclFunction(std::move(raw_fn)),
303 ActivationBuilder::generate(activation, ofm_tensor->handle()));
306 void KernelGenerator::visit(const ir::operation::AvgPool2D &node)
308 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
309 node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::AVG);
311 const auto ofm_index{node.getOutputs().at(0)};
312 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
313 const auto activation = node.param().activation;
314 _return_fn = std::make_unique<exec::FunctionSequence>(
315 asAclFunction(std::move(raw_fn)),
316 ActivationBuilder::generate(activation, ofm_tensor->handle()));
319 void KernelGenerator::visit(const ir::operation::Concat &node)
321 const auto ofm_index{node.getOutputs().at(0)};
323 std::vector<ir::OperandIndex> input_indexes;
324 for (const auto &input : node.getInputs())
325 input_indexes.emplace_back(input);
327 const auto axis = node.param().axis;
329 // Concat elimination check
330 bool eliminated = _tensor_builder->areSubTensorsOf(ofm_index, node.getInputs());
333 // If concat eliminated, return a NOP IFunction
334 VERBOSE(acl_neon_KernelGenerator_Concat) << "Concat eliminated" << std::endl;
335 _return_fn = std::make_unique<exec::NopFunction>();
339 auto output_tensor = _tensor_builder->at(ofm_index).get();
340 std::vector<::arm_compute::ITensor *> input_tensors;
341 for (const auto &ifm_ind : input_indexes)
342 input_tensors.emplace_back(_tensor_builder->at(ifm_ind)->handle());
344 std::unique_ptr<::arm_compute::IFunction> fn;
345 if (input_indexes.size() < 2)
347 auto l = std::make_unique<::arm_compute::NECopy>();
348 l->configure(input_tensors.at(0), output_tensor->handle());
353 auto l = std::make_unique<::arm_compute::NEConcatenateLayer>();
354 const auto rank = _ctx.at(ofm_index).shape().rank();
355 const auto frontend_layout = _current_op_seq_layout;
356 const auto backend_layout = output_tensor->layout();
357 const auto fixed_axis =
358 acl_common::ToARMComputeAxis(rank, axis, frontend_layout, backend_layout).value();
359 l->configure(input_tensors, output_tensor->handle(), fixed_axis);
363 auto acl_fn = asAclFunction(std::move(fn));
365 _return_fn = std::move(acl_fn);
368 void KernelGenerator::visit(const ir::operation::EmbeddingLookup &node)
370 const auto output_index{node.getOutputs().at(0)};
371 const auto lookups_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::LOOKUPS)};
372 const auto values_index{node.getInputs().at(ir::operation::EmbeddingLookup::Input::VALUES)};
374 auto output_tensor = _tensor_builder->at(output_index).get();
375 auto lookups_tensor = _tensor_builder->at(lookups_index).get();
376 auto values_tensor = _tensor_builder->at(values_index).get();
378 auto fn = std::make_unique<::arm_compute::NEEmbeddingLookup>();
380 fn->configure(values_tensor->handle(), output_tensor->handle(), lookups_tensor->handle());
382 auto acl_fn = asAclFunction(std::move(fn));
384 _return_fn = std::move(acl_fn);
387 void KernelGenerator::visit(const ir::operation::Floor &node)
389 const auto ofm_index{node.getOutputs().at(0)};
390 const auto ifm_index{node.getInputs().at(ir::operation::Floor::Input::INPUT)};
392 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
393 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
395 auto fn = std::make_unique<::arm_compute::NEFloor>();
397 fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
399 auto acl_fn = asAclFunction(std::move(fn));
401 _return_fn = std::move(acl_fn);
404 void KernelGenerator::visit(const ir::operation::FullyConnected &node)
406 const auto output_index{node.getOutputs().at(0)};
407 auto output_tensor = _tensor_builder->at(output_index).get();
408 const auto activation = node.param().activation;
410 auto fn = acl_common::kernelGenFullyConnected<acl_common::AclFunction, ::arm_compute::ITensor,
411 ::arm_compute::NEFullyConnectedReshapingLayer>(
412 node, _ctx, _tensor_builder, _current_op_seq_layout);
413 _return_fn = std::make_unique<exec::FunctionSequence>(
414 std::move(fn), ActivationBuilder::generate(activation, output_tensor->handle()));
417 void KernelGenerator::visit(const ir::operation::HashtableLookup &node)
419 const auto output_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::OUTPUT)};
420 const auto hits_index{node.getOutputs().at(ir::operation::HashtableLookup::Output::HITS)};
422 const auto lookups_index{node.getInputs().at(ir::operation::HashtableLookup::Input::LOOKUPS)};
423 const auto keys_index{node.getInputs().at(ir::operation::HashtableLookup::Input::KEYS)};
424 const auto values_index{node.getInputs().at(ir::operation::HashtableLookup::Input::VALUES)};
426 auto output_tensor = _tensor_builder->at(output_index).get();
427 auto hits_tensor = _tensor_builder->at(hits_index).get();
429 auto lookups_tensor = _tensor_builder->at(lookups_index).get();
430 auto keys_tensor = _tensor_builder->at(keys_index).get();
431 auto values_tensor = _tensor_builder->at(values_index).get();
433 auto fn = std::make_unique<::arm_compute::NEHashtableLookup>();
435 fn->configure(lookups_tensor->handle(), keys_tensor->handle(), values_tensor->handle(),
436 output_tensor->handle(), hits_tensor->handle());
438 auto acl_fn = asAclFunction(std::move(fn));
440 _return_fn = std::move(acl_fn);
443 void KernelGenerator::visit(const ir::operation::Gather &node)
445 const auto ofm_index{node.getOutputs().at(0)};
447 const auto ifm_index{node.getInputs().at(ir::operation::Gather::Input::INPUT)};
448 const auto indices_index{node.getInputs().at(ir::operation::Gather::Input::INDICES)};
450 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
451 const auto axis_raw = node.param().axis;
452 const auto axis_value = (axis_raw < 0 ? (ifm_rank + axis_raw) : axis_raw);
453 // Converting in reverse order
454 const int axis = ::onert::backend::acl_common::ToARMComputeAxis(ifm_rank, axis_value).value();
456 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
457 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
458 auto indices_tensor = _tensor_builder->at(indices_index).get();
459 const auto backend_layout = ofm_tensor->layout();
460 UNUSED_RELEASE(backend_layout);
462 // NOTE The frontend layout and backend layout must be the same for this operation.
463 // If not the same, we have to add a stage(?) to perform permutation of output tensor. It
464 // is not not efficient even if it works well. If so, it would be better to set the
465 // layout of these backend tensors to the same layout.
466 // There is also one thing we have to think about. This operation depends on the layout of
467 // a model. For example, if a model in NHWC has this operation as output rank == 4, indices
468 // rank == 2 and axis == 2, this operation should work as the axis W and C, but the axis W
469 // and C are not sequential in NCHW. So the backend in NCHW cannot handle this case.
470 assert(backend_layout == ifm_tensor->layout());
471 assert(backend_layout == indices_tensor->layout());
472 assert(ifm_rank < 4 || _current_op_seq_layout == backend_layout);
474 auto fn = std::make_unique<::arm_compute::NEGatherEx>();
476 // input is n-D, indices k-D, output is (n + k - 1)-D
478 assert(n == ifm_tensor->num_dimensions());
479 size_t k = _ctx.at(indices_index).shape().rank();
480 assert(k == indices_tensor->num_dimensions());
482 // Disable applied dim_correction
483 if (n != ifm_tensor->info()->num_dimensions())
485 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
486 const auto ifm = _ctx.at(ifm_index);
487 ifm_tensor->info()->set_tensor_shape(
488 acl_common::asTensorShape(ifm.shape(), _current_op_seq_layout, backend_layout, false));
490 if (k != indices_tensor->info()->num_dimensions())
492 // This means that high dimension's value is 1 and indices tensor is applied dim_correction
493 const auto indices = _ctx.at(indices_index);
494 indices_tensor->info()->set_tensor_shape(
495 acl_common::asTensorShape(indices.shape(), _current_op_seq_layout, backend_layout, false));
498 fn->configure(ifm_tensor->handle(), indices_tensor->handle(), ofm_tensor->handle(), axis);
500 // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
501 // use arm_compute::TensorInfo::offset_element_in_bytes()
502 // It would create an error when the kernel accesses high dimension that its value is 1
504 auto acl_fn = asAclFunction(std::move(fn));
506 _return_fn = std::move(acl_fn);
509 void KernelGenerator::visit(const ir::operation::InstanceNorm &node)
511 const auto ofm_index{node.getOutputs().at(0)};
512 const auto ifm_index{node.getInputs().at(ir::operation::InstanceNorm::Input::INPUT)};
513 const auto gamma_index{node.getInputs().at(ir::operation::InstanceNorm::Input::GAMMA)};
514 const auto beta_index{node.getInputs().at(ir::operation::InstanceNorm::Input::BETA)};
516 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
517 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
518 auto gamma_tensor = _tensor_builder->at(gamma_index).get();
519 auto beta_tensor = _tensor_builder->at(beta_index).get();
520 auto epsilon = node.param().epsilon;
521 auto activation = node.param().activation;
523 auto fn = std::make_unique<::arm_compute::NEInstanceNormalizationLayerEx>();
525 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), gamma_tensor->handle(),
526 beta_tensor->handle(), epsilon);
528 _return_fn = std::make_unique<exec::FunctionSequence>(
529 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
532 void KernelGenerator::visit(const ir::operation::L2Normalization &node)
534 const auto ofm_index{node.getOutputs().at(0)};
535 const auto ifm_index{node.getInputs().at(ir::operation::L2Normalization::Input::INPUT)};
537 // {CL|Neon}L2Normalization performs the reduction only along dimension 0
538 // L2 Normalization always performs the reduction along the depth axis
539 // Thus, we repurpose {CL|Neon}NormalizationLayers to act as depthwise L2 normalizations by
540 // choosing normalization parameters as below
542 const auto &ifm_shape = _ctx.at(ifm_index).shape();
543 // TODO Support optional constant dimension that normalization would be performed on
544 const auto normalization_axis = _ctx.at(ifm_index).shape().rank() - 1;
546 2 * ifm_shape.dim(normalization_axis) + 1; // normSize = depth(last dimension) * 2 + 1
547 float alpha = 1.0f; // In the implementation to make alpha_ become 1
548 float beta = 0.5f; // pow(reduction, -0.5) = 1 / sqrt(reduction)
549 float bias = 0.0f; // Don't offset the reduction.
551 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
552 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
554 const auto norm_info = ::arm_compute::NormalizationLayerInfo(::arm_compute::NormType::CROSS_MAP,
555 radius, alpha, beta, bias, false);
557 auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
559 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
561 auto acl_fn = asAclFunction(std::move(fn));
563 _return_fn = std::move(acl_fn);
566 void KernelGenerator::visit(const ir::operation::L2Pool2D &node)
568 auto raw_fn = acl_common::kernelGenPool2D<::arm_compute::NEPoolingLayer>(
569 node, _ctx, _tensor_builder, _current_op_seq_layout, ::arm_compute::PoolingType::L2);
571 const auto ofm_index{node.getOutputs().at(0)};
572 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
573 const auto activation = node.param().activation;
574 _return_fn = std::make_unique<exec::FunctionSequence>(
575 asAclFunction(std::move(raw_fn)),
576 ActivationBuilder::generate(activation, ofm_tensor->handle()));
579 void KernelGenerator::visit(const ir::operation::LocalResponseNormalization &node)
581 const auto ofm_index{node.getOutputs().at(0)};
582 const auto ifm_index{
583 node.getInputs().at(ir::operation::LocalResponseNormalization::Input::INPUT)};
585 auto radius = node.param().radius;
586 auto alpha = node.param().alpha;
587 auto beta = node.param().beta;
588 auto bias = node.param().bias;
590 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
591 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
593 const auto norm_info = ::arm_compute::NormalizationLayerInfo(
594 ::arm_compute::NormType::CROSS_MAP, radius * 2 + 1, alpha, beta, bias, false);
596 auto fn = std::make_unique<::arm_compute::NENormalizationLayer>();
598 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), norm_info);
600 auto acl_fn = asAclFunction(std::move(fn));
602 _return_fn = std::move(acl_fn);
605 void KernelGenerator::visit(const ir::operation::LogicalAnd &node)
607 const auto output_index{node.getOutputs().at(0)};
608 const auto input0_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT0)};
609 const auto input1_index{node.getInputs().at(ir::operation::LogicalAnd::Input::INPUT1)};
611 auto output_tensor = _tensor_builder->at(output_index).get();
612 auto input0_tensor = _tensor_builder->at(input0_index).get();
613 auto input1_tensor = _tensor_builder->at(input1_index).get();
615 auto fn = std::make_unique<::arm_compute::NELogicalAnd>();
617 fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
619 auto acl_fn = asAclFunction(std::move(fn));
621 _return_fn = std::move(acl_fn);
624 void KernelGenerator::visit(const ir::operation::LogicalNot &node)
626 const auto output_index{node.getOutputs().at(0)};
627 const auto input_index{node.getInputs().at(ir::operation::LogicalNot::Input::INPUT)};
629 auto output_tensor = _tensor_builder->at(output_index).get();
630 auto input_tensor = _tensor_builder->at(input_index).get();
632 auto fn = std::make_unique<::arm_compute::NEBitwiseNot>();
634 fn->configure(input_tensor->handle(), output_tensor->handle());
636 auto acl_fn = asAclFunction(std::move(fn));
638 _return_fn = std::move(acl_fn);
641 void KernelGenerator::visit(const ir::operation::LogicalOr &node)
643 const auto output_index{node.getOutputs().at(0)};
644 const auto input0_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT0)};
645 const auto input1_index{node.getInputs().at(ir::operation::LogicalOr::Input::INPUT1)};
647 auto output_tensor = _tensor_builder->at(output_index).get();
648 auto input0_tensor = _tensor_builder->at(input0_index).get();
649 auto input1_tensor = _tensor_builder->at(input1_index).get();
651 auto fn = std::make_unique<::arm_compute::NELogicalOr>();
653 fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle());
655 auto acl_fn = asAclFunction(std::move(fn));
657 _return_fn = std::move(acl_fn);
660 void KernelGenerator::visit(const ir::operation::Logistic &node)
662 const auto ofm_index{node.getOutputs().at(0)};
663 const auto ifm_index{node.getInputs().at(ir::operation::Logistic::Input::INPUT)};
665 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
666 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
668 const ::arm_compute::ActivationLayerInfo act_info{
669 ::arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC};
671 // NOTE NEActivationLayer can generate produce erroneous results. it were caused by 'vexpq_f32()'.
672 // The neon function returns a value outside of the limit of representation in float as 'NaN'
673 // instead of 'INF', and then the result of this op will be errors due to the 'NaN'.
674 auto fn = std::make_unique<::arm_compute::NEActivationLayerEx>();
676 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
678 auto acl_fn = asAclFunction(std::move(fn));
680 _return_fn = std::move(acl_fn);
683 void KernelGenerator::visit(const ir::operation::LSTM &node)
685 _return_fn = acl_common::kernelGenLSTM<acl_common::AclFunction, ::arm_compute::ITensor,
686 ::arm_compute::NELSTMLayer>(node, _ctx, _tensor_builder);
689 void KernelGenerator::visit(const ir::operation::Mul &node)
691 const auto ofm_index{node.getOutputs().at(0)};
692 const auto lhs_index{node.getInputs().at(ir::operation::Mul::Input::LHS)};
693 const auto rhs_index{node.getInputs().at(ir::operation::Mul::Input::RHS)};
695 const auto activation = node.param().activation;
697 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
698 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
699 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
701 auto fn = std::make_unique<::arm_compute::NEPixelWiseMultiplication>();
703 // RoundingPolicy for scale:1.0 is only allowed RoundingPolicy::TO_ZERO
704 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(), 1.0, // scale
705 arm_compute::ConvertPolicy::SATURATE, arm_compute::RoundingPolicy::TO_ZERO);
707 _return_fn = std::make_unique<exec::FunctionSequence>(
708 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
711 void KernelGenerator::visit(const ir::operation::Neg &node)
713 const auto ofm_index{node.getOutputs().at(0)};
714 const auto ifm_index{node.getInputs().at(ir::operation::Neg::Input::INPUT)};
716 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
717 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
719 auto fn = std::make_unique<::arm_compute::NENegLayer>();
721 fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
723 auto acl_fn = asAclFunction(std::move(fn));
725 _return_fn = std::move(acl_fn);
728 void KernelGenerator::visit(const ir::operation::Pack &node)
730 const auto output_index{node.getOutputs().at(0)};
731 auto axis{node.param().axis};
733 const auto output_rank = _ctx.at(output_index).shape().rank();
735 std::vector<ir::OperandIndex> input_indexes;
736 for (const auto &input_index : node.getInputs())
737 input_indexes.emplace_back(input_index);
739 auto output = _tensor_builder->at(output_index).get()->handle();
740 std::vector<arm_compute::ITensor *> inputs;
741 for (const auto &input_index : input_indexes)
742 inputs.emplace_back(_tensor_builder->at(input_index)->handle());
744 const auto frontend_layout = _current_op_seq_layout;
745 const auto backend_layout = _tensor_builder->at(output_index).get()->layout();
749 axis = acl_common::ToARMComputeAxis(output_rank, axis, frontend_layout, backend_layout).value();
751 auto fn = std::make_unique<::arm_compute::NEStackLayer>();
753 // Disable applied dim_correction
754 for (const auto &input_index : input_indexes)
756 size_t input_rank = _ctx.at(input_index).shape().rank();
757 const auto &input_tensor = _tensor_builder->at(input_index);
758 assert(input_rank == input_tensor->num_dimensions());
759 if (input_rank != input_tensor->info()->num_dimensions())
761 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
762 input_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
763 _ctx.at(input_index).shape(), _current_op_seq_layout, backend_layout, false));
767 fn->configure(inputs, axis, output);
769 // acl_neon doesn't not revert disabling applied dim_correction because acl_neon's kernels would
770 // use arm_compute::TensorInfo::offset_element_in_bytes()
771 // It would create an error when the kernel accesses high dimension that its value is 1
773 _return_fn = asAclFunction(std::move(fn));
776 void KernelGenerator::visit(const ir::operation::Pad &node)
778 const auto input_index{node.getInputs().at(ir::operation::Pad::Input::INPUT)};
779 const auto pad_index{node.getInputs().at(ir::operation::Pad::Input::PAD)};
780 const auto output_index{node.getOutputs().at(0)};
781 assert(_ctx.at(pad_index).data());
783 auto rank = _ctx.at(input_index).shape().rank();
784 auto pad_base = _ctx.at(pad_index).data()->base();
786 auto input = _tensor_builder->at(input_index).get()->handle();
787 auto output = _tensor_builder->at(output_index).get()->handle();
789 ::arm_compute::PaddingList padding_list;
790 padding_list.resize(rank);
791 for (int32_t n = 0; n < rank; ++n)
793 const int32_t *from = reinterpret_cast<const int32_t *>(pad_base) + (n * 2);
795 const auto frontend_layout = _current_op_seq_layout;
796 const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
798 acl_common::ToARMComputeAxis(rank, n, frontend_layout, backend_layout).value();
799 padding_list[axis] = ::arm_compute::PaddingInfo{from[0], from[1]};
802 const auto input_type = _ctx.at(input_index).typeInfo();
803 UNUSED_RELEASE(input_type);
804 assert(input->info()->data_type() == acl_common::asDataType(input_type.type()));
805 assert(input->info()->quantization_info() ==
806 ::arm_compute::QuantizationInfo(input_type.scale(), input_type.offset()));
807 const auto pixel_value =
808 ::arm_compute::PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
810 auto fn = std::make_unique<::arm_compute::NEPadLayer>();
811 fn->configure(input, output, padding_list, pixel_value);
813 _return_fn = asAclFunction(std::move(fn));
816 void KernelGenerator::visit(const ir::operation::Permute &node)
818 const auto ofm_idx{node.getOutputs().at(0)};
819 const auto ifm_idx{node.getInputs().at(0)};
820 const auto permute_type = node.getPermuteType();
821 auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
822 auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
823 const auto rank = _ctx.at(ofm_idx).shape().rank();
824 assert(_ctx.at(ifm_idx).shape().rank() == _ctx.at(ofm_idx).shape().rank());
826 std::unique_ptr<::arm_compute::IFunction> fn;
827 arm_compute::PermutationVector pv;
828 if (permute_type == ir::operation::Permute::Type::NCHW_TO_NHWC && rank == 4)
831 pv = arm_compute::PermutationVector{2, 0, 1};
833 auto l = std::make_unique<::arm_compute::NEPermute>();
835 l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
839 else if (permute_type == ir::operation::Permute::Type::NHWC_TO_NCHW && rank == 4)
842 pv = arm_compute::PermutationVector{1, 2, 0};
844 auto l = std::make_unique<::arm_compute::NEPermute>();
846 l->configure(ifm_tensor->handle(), ofm_tensor->handle(), pv);
852 auto l = std::make_unique<::arm_compute::NECopy>();
854 l->configure(ifm_tensor->handle(), ofm_tensor->handle());
859 auto acl_fn = asAclFunction(std::move(fn));
861 _return_fn = std::move(acl_fn);
864 void KernelGenerator::visit(const ir::operation::PReLU &node)
866 const auto ofm_index{node.getOutputs().at(0)};
867 const auto ifm_index{node.getInputs().at(ir::operation::PReLU::Input::INPUT)};
868 const auto alpha_index{node.getInputs().at(ir::operation::PReLU::Input::ALPHA)};
870 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
871 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
872 auto alpha_tensor = _tensor_builder->at(alpha_index).get();
874 std::unique_ptr<::arm_compute::IFunction> fn;
876 auto l = std::make_unique<::arm_compute::NEPReluLayer>();
878 l->configure(ifm_tensor->handle(), alpha_tensor->handle(), ofm_tensor->handle());
882 auto acl_fn = asAclFunction(std::move(fn));
884 _return_fn = std::move(acl_fn);
887 void KernelGenerator::visit(const ir::operation::Reduce &node)
889 const auto output_index{node.getOutputs().at(0)};
890 const auto input_index{node.getInputs().at(ir::operation::Reduce::Input::INPUT)};
891 const auto axes_index{node.getInputs().at(ir::operation::Reduce::Input::AXES)};
893 auto output_tensor = _tensor_builder->at(output_index).get();
894 auto input_tensor = _tensor_builder->at(input_index).get();
896 // Convert to ACL axes taking into account negative values and possible duplicates.
897 const auto &axes = _ctx.at(axes_index);
898 const auto input_rank = _ctx.at(input_index).shape().rank();
899 const auto frontend_layout = _current_op_seq_layout;
900 const auto backend_layout = input_tensor->layout();
901 const auto reduce_axes =
902 acl_common::asCoordinates(axes, input_rank, frontend_layout, backend_layout);
903 const auto reduce_type = node.param().reduce_type;
904 const auto keep_dims = node.param().keep_dims;
906 std::unique_ptr<::arm_compute::IFunction> fn;
907 if (reduce_type == ir::operation::Reduce::ReduceType::MEAN)
909 auto l = std::make_unique<::arm_compute::NEReduceMean>();
911 l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
915 else if (reduce_type == ir::operation::Reduce::ReduceType::SUM)
917 auto l = std::make_unique<::arm_compute::NEReduceSum>();
919 l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle());
925 auto l = std::make_unique<::arm_compute::NEReduceOperation>();
927 l->configure(input_tensor->handle(), reduce_axes, keep_dims, output_tensor->handle(),
928 acl_common::convertReduceType(reduce_type));
933 auto acl_fn = asAclFunction(std::move(fn));
935 _return_fn = std::move(acl_fn);
938 void KernelGenerator::visit(const ir::operation::ReLU &node)
940 const auto output_index{node.getOutputs().at(0)};
941 const auto input_index{node.getInputs().at(ir::operation::ReLU::Input::INPUT)};
943 auto output_tensor = _tensor_builder->at(output_index).get();
944 auto input_tensor = _tensor_builder->at(input_index).get();
946 auto fn = std::make_unique<arm_compute::NEActivationLayer>();
948 const ::arm_compute::ActivationLayerInfo act_info{
949 ::arm_compute::ActivationLayerInfo::ActivationFunction::RELU};
951 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
953 auto acl_fn = asAclFunction(std::move(fn));
955 _return_fn = std::move(acl_fn);
958 void KernelGenerator::visit(const ir::operation::ReLU1 &node)
960 const auto ofm_index{node.getOutputs().at(0)};
961 const auto ifm_index{node.getInputs().at(ir::operation::ReLU1::Input::INPUT)};
963 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
964 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
966 const ::arm_compute::ActivationLayerInfo act_info{
967 ::arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 1.0f, -1.0f};
969 auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
971 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
973 auto acl_fn = asAclFunction(std::move(fn));
975 _return_fn = std::move(acl_fn);
978 void KernelGenerator::visit(const ir::operation::ReLU6 &node)
980 const auto ofm_index{node.getOutputs().at(0)};
981 const auto ifm_index{node.getInputs().at(ir::operation::ReLU6::Input::INPUT)};
983 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
984 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
986 const ::arm_compute::ActivationLayerInfo act_info{
987 ::arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0f};
989 auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
991 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), act_info);
993 auto acl_fn = asAclFunction(std::move(fn));
995 _return_fn = std::move(acl_fn);
998 void KernelGenerator::visit(const ir::operation::Reshape &node)
1000 const auto output_index{node.getOutputs().at(0)};
1001 const auto input_index{node.getInputs().at(ir::operation::Reshape::Input::INPUT)};
1003 auto output_tensor = _tensor_builder->at(output_index).get();
1004 auto input_tensor = _tensor_builder->at(input_index).get();
1006 // NOTE This operation must not be changed the layout from frontend to backend
1007 // So, PermutationOperationPass makes layouts of frontend and backend the same.
1008 const auto frontend_layout = _current_op_seq_layout;
1009 const auto backend_layout = output_tensor->layout();
1010 assert((_ctx.at(input_index).shape().rank() < 4 && _ctx.at(output_index).shape().rank() < 4) ||
1011 frontend_layout == backend_layout);
1012 UNUSED_RELEASE(frontend_layout);
1013 UNUSED_RELEASE(backend_layout);
1015 auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
1017 fn->configure(input_tensor->handle(), output_tensor->handle());
1019 auto acl_fn = asAclFunction(std::move(fn));
1021 _return_fn = std::move(acl_fn);
1024 void KernelGenerator::visit(const ir::operation::ResizeBilinear &node)
1026 const auto ofm_index{node.getOutputs().at(0)};
1028 const auto ifm_index{node.getInputs().at(ir::operation::ResizeBilinear::Input::INPUT)};
1030 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1031 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1033 auto fn = std::make_unique<::arm_compute::NEScale>();
1035 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(),
1036 ::arm_compute::InterpolationPolicy::BILINEAR, ::arm_compute::BorderMode::REPLICATE,
1037 ::arm_compute::PixelValue(0.f), ::arm_compute::SamplingPolicy::TOP_LEFT);
1039 auto acl_fn = asAclFunction(std::move(fn));
1041 _return_fn = std::move(acl_fn);
1044 void KernelGenerator::visit(const ir::operation::RNN &node)
1046 const auto output_index{node.getOutputs().at(ir::operation::RNN::Output::OUTPUT)};
1047 const auto hidden_state_out_index{
1048 node.getOutputs().at(ir::operation::RNN::Output::HIDDEN_STATE_OUT)};
1050 const auto input_index{node.getInputs().at(ir::operation::RNN::Input::INPUT)};
1051 const auto weights_index{node.getInputs().at(ir::operation::RNN::Input::WEIGHTS)};
1052 const auto recurrent_weights_index{
1053 node.getInputs().at(ir::operation::RNN::Input::RECURRENT_WEIGHTS)};
1054 const auto bias_index{node.getInputs().at(ir::operation::RNN::Input::BIAS)};
1055 const auto hidden_state_in_index{node.getInputs().at(ir::operation::RNN::Input::HIDDEN_STATE_IN)};
1057 const auto activation = node.param().activation;
1059 auto output_tensor = _tensor_builder->at(output_index).get();
1060 auto hidden_state_out_tensor = _tensor_builder->at(hidden_state_out_index).get();
1062 auto input_tensor = _tensor_builder->at(input_index).get();
1063 auto weights_tensor = _tensor_builder->at(weights_index).get();
1064 auto recurrent_weights_tensor = _tensor_builder->at(recurrent_weights_index).get();
1065 auto bias_tensor = _tensor_builder->at(bias_index).get();
1066 auto hidden_state_in_tensor = _tensor_builder->at(hidden_state_in_index).get();
1067 auto act_info = ::onert::backend::acl_common::asActivationLayerInfo(activation);
1069 auto copy_layer = std::make_unique<::arm_compute::NECopy>();
1070 copy_layer->configure(hidden_state_in_tensor->handle(), hidden_state_out_tensor->handle());
1071 _return_fn = asAclFunction(std::move(copy_layer));
1073 auto fn = std::make_unique<::arm_compute::NERNNLayer>(
1074 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
1075 fn->configure(input_tensor->handle(), weights_tensor->handle(),
1076 recurrent_weights_tensor->handle(), bias_tensor->handle(),
1077 hidden_state_out_tensor->handle(), output_tensor->handle(), act_info);
1078 _return_fn = asAclFunction(std::move(fn));
1081 void KernelGenerator::visit(const ir::operation::RSQRT &node)
1083 const auto ofm_index{node.getOutputs().at(0)};
1084 const auto ifm_index{node.getInputs().at(ir::operation::RSQRT::Input::INPUT)};
1086 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1087 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1089 auto fn = std::make_unique<::arm_compute::NERsqrtLayer>();
1091 fn->configure(ifm_tensor->handle(), ofm_tensor->handle());
1093 _return_fn = asAclFunction(std::move(fn));
1096 void KernelGenerator::visit(const ir::operation::Squeeze &node)
1098 // Squeeze is identical to reshape except that it has an optional dimensions input.
1099 // In addition, optional dims_index is ignored since output tensor already has squeezed shape
1100 // by freezer and toco
1101 const auto output_index{node.getOutputs().at(0)};
1102 const auto input_index{node.getInputs().at(ir::operation::Squeeze::Input::INPUT)};
1103 const auto dims{node.param().dims};
1104 const auto ndim{node.param().ndim};
1108 auto output_tensor = _tensor_builder->at(output_index).get();
1109 auto input_tensor = _tensor_builder->at(input_index).get();
1110 auto fn = std::make_unique<arm_compute::NEReshapeLayer>();
1111 fn->configure(input_tensor->handle(), output_tensor->handle());
1112 auto acl_fn = asAclFunction(std::move(fn));
1113 _return_fn = std::move(acl_fn);
1116 void KernelGenerator::visit(const ir::operation::Tanh &node)
1118 const auto output_index{node.getOutputs().at(0)};
1119 const auto input_index{node.getInputs().at(ir::operation::Tanh::Input::INPUT)};
1121 auto output_tensor = _tensor_builder->at(output_index).get();
1122 auto input_tensor = _tensor_builder->at(input_index).get();
1124 auto fn = std::make_unique<arm_compute::NEActivationLayer>();
1126 const ::arm_compute::ActivationLayerInfo act_info{
1127 ::arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
1129 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
1131 auto acl_fn = asAclFunction(std::move(fn));
1133 _return_fn = std::move(acl_fn);
1136 void KernelGenerator::visit(const ir::operation::Softmax &node)
1138 const auto output_index{node.getOutputs().at(0)};
1139 const auto input_index{node.getInputs().at(ir::operation::Softmax::Input::INPUT)};
1140 const auto beta = node.param().beta;
1142 auto output_tensor = _tensor_builder->at(output_index).get();
1143 auto input_tensor = _tensor_builder->at(input_index).get();
1144 const auto frontend_layout = _current_op_seq_layout;
1145 const auto backend_layout = input_tensor->layout();
1147 // Disable applied dim_correction
1148 const size_t input_rank = _ctx.at(input_index).shape().rank();
1149 if (input_rank != input_tensor->info()->num_dimensions())
1151 // This means that high dimension's value is 1 and input tensor is applied dim_correction
1152 const auto input = _ctx.at(input_index);
1153 input_tensor->info()->set_tensor_shape(
1154 acl_common::asTensorShape(input.shape(), frontend_layout, backend_layout, false));
1157 auto fn = std::make_unique<::arm_compute::NESoftmaxLayer>(
1158 _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
1160 fn->configure(input_tensor->handle(), output_tensor->handle(), beta);
1162 auto acl_fn = asAclFunction(std::move(fn));
1164 _return_fn = std::move(acl_fn);
1167 void KernelGenerator::visit(const ir::operation::SpaceToBatchND &node)
1169 const auto ofm_index{node.getOutputs().at(0)};
1170 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::INPUT)};
1171 const auto block_size_index{
1172 node.getInputs().at(ir::operation::SpaceToBatchND::Input::BLOCK_SIZE)};
1173 const auto paddings_index{node.getInputs().at(ir::operation::SpaceToBatchND::Input::PADDINGS)};
1175 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1176 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1177 auto block_size_tensor = _tensor_builder->at(block_size_index).get();
1178 auto paddings_tensor = _tensor_builder->at(paddings_index).get();
1180 assert(_ctx.at(block_size_index).data());
1181 assert(_ctx.at(paddings_index).data());
1183 auto fn = std::make_unique<::arm_compute::NESpaceToBatchLayer>();
1185 fn->configure(ifm_tensor->handle(), block_size_tensor->handle(), paddings_tensor->handle(),
1186 ofm_tensor->handle());
1188 auto acl_fn = asAclFunction(std::move(fn));
1190 _return_fn = std::move(acl_fn);
1193 void KernelGenerator::visit(const ir::operation::SpaceToDepth &node)
1195 const auto ofm_index{node.getOutputs().at(0)};
1196 const auto ifm_index{node.getInputs().at(ir::operation::SpaceToDepth::Input::INPUT)};
1198 auto block_size = node.param().block_size;
1200 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1201 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1203 auto fn = std::make_unique<::arm_compute::NESpaceToDepthLayer>();
1205 fn->configure(ifm_tensor->handle(), ofm_tensor->handle(), block_size);
1207 auto acl_fn = asAclFunction(std::move(fn));
1209 _return_fn = std::move(acl_fn);
1212 void KernelGenerator::visit(const ir::operation::Split &node)
1214 // TODO Support this op by SubTensor
1215 const auto ifm_index{node.getInputs().at(ir::operation::Split::Input::INPUT)};
1217 assert(node.param().num_splits == static_cast<int>(node.getOutputs().size()));
1219 const auto ifm_rank = _ctx.at(ifm_index).shape().rank();
1220 std::vector<ir::OperandIndex> output_indexes;
1221 for (const auto &output : node.getOutputs())
1222 output_indexes.emplace_back(output);
1224 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1225 std::vector<arm_compute::ITensor *> output_tensors;
1226 for (const auto &ofm_ind : output_indexes)
1227 output_tensors.emplace_back(_tensor_builder->at(ofm_ind).get()->handle());
1229 const auto frontend_layout = _current_op_seq_layout;
1230 const auto backend_layout = ifm_tensor->layout();
1231 auto axis = node.param().axis;
1234 axis = acl_common::ToARMComputeAxis(ifm_rank, axis, frontend_layout, backend_layout).value();
1236 auto fn = std::make_unique<::arm_compute::NESplit>();
1238 fn->configure(ifm_tensor->handle(), output_tensors, axis);
1240 _return_fn = asAclFunction(std::move(fn));
1243 void KernelGenerator::visit(const ir::operation::SQRT &node)
1245 const auto output_index{node.getOutputs().at(0)};
1246 const auto input_index{node.getInputs().at(ir::operation::SQRT::Input::INPUT)};
1248 auto output_tensor = _tensor_builder->at(output_index).get();
1249 auto input_tensor = _tensor_builder->at(input_index).get();
1251 const ::arm_compute::ActivationLayerInfo act_info{
1252 ::arm_compute::ActivationLayerInfo::ActivationFunction::SQRT};
1254 auto fn = std::make_unique<::arm_compute::NEActivationLayer>();
1256 fn->configure(input_tensor->handle(), output_tensor->handle(), act_info);
1258 auto acl_fn = asAclFunction(std::move(fn));
1260 _return_fn = std::move(acl_fn);
1263 void KernelGenerator::visit(const ir::operation::SquaredDifference &node)
1265 const auto ofm_index{node.getOutputs().at(0)};
1266 const auto lhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::LHS)};
1267 const auto rhs_index{node.getInputs().at(ir::operation::SquaredDifference::Input::RHS)};
1269 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1270 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1271 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1273 auto fn = std::make_unique<::arm_compute::NEElementwiseSquaredDiff>();
1275 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1277 auto acl_fn = asAclFunction(std::move(fn));
1279 _return_fn = std::move(acl_fn);
1282 void KernelGenerator::visit(const ir::operation::Sub &node)
1284 const auto ofm_index{node.getOutputs().at(0)};
1285 const auto lhs_index{node.getInputs().at(ir::operation::Sub::Input::LHS)};
1286 const auto rhs_index{node.getInputs().at(ir::operation::Sub::Input::RHS)};
1288 const auto activation = node.param().activation;
1290 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1291 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1292 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1294 auto fn = std::make_unique<::arm_compute::NEArithmeticSubtraction>();
1296 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
1297 arm_compute::ConvertPolicy::SATURATE);
1299 _return_fn = std::make_unique<exec::FunctionSequence>(
1300 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
1303 void KernelGenerator::visit(const ir::operation::Slice &node)
1305 const auto output_index{node.getOutputs().at(0)};
1306 const auto input_index{node.getInputs().at(ir::operation::Slice::Input::INPUT)};
1307 const auto begins_index{node.getInputs().at(ir::operation::Slice::Input::BEGINS)};
1308 const auto sizes_index{node.getInputs().at(ir::operation::Slice::Input::SIZES)};
1310 auto outputData_tensor = _tensor_builder->at(output_index).get();
1311 auto inputData_tensor = _tensor_builder->at(input_index).get();
1312 const auto frontend_layout = _current_op_seq_layout;
1313 const auto backend_layout = inputData_tensor->layout();
1315 // Set initializers for indices data such as order of inputData
1316 int input_rank = _ctx.at(input_index).shape().rank();
1317 std::vector<int32_t> starts;
1318 std::vector<int32_t> ends;
1319 starts.resize(input_rank, 0);
1320 ends.resize(input_rank, 0);
1322 auto beginData_base = _ctx.at(begins_index).data()->base();
1323 auto sizeData_base = _ctx.at(sizes_index).data()->base();
1324 const int beginData_size = _ctx.at(begins_index).shape().num_elements();
1325 const int sizeData_size = _ctx.at(sizes_index).shape().num_elements();
1329 UNUSED_RELEASE(beginData_size);
1330 UNUSED_RELEASE(sizeData_size);
1332 assert(_ctx.at(begins_index).typeInfo().type() == DataType::INT32);
1333 assert(_ctx.at(sizes_index).typeInfo().type() == DataType::INT32);
1334 assert(beginData_size == input_rank);
1335 assert(sizeData_size == input_rank);
1337 assert(beginData_base != nullptr);
1338 for (int n = 0; n < input_rank; ++n)
1340 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
1344 int32_t begin_value = *(reinterpret_cast<const int32_t *>(beginData_base) + n);
1345 starts[axis] = begin_value;
1347 int32_t size_value = *(reinterpret_cast<const int32_t *>(sizeData_base) + n);
1348 ends[axis] = begin_value + size_value;
1352 ::arm_compute::Coordinates starts_set;
1353 ::arm_compute::Coordinates ends_set;
1355 for (size_t i = 0; i < starts.size(); ++i)
1357 starts_set.set(i, starts[i]);
1358 ends_set.set(i, ends[i]);
1361 auto fn = std::make_unique<::arm_compute::NESlice>();
1363 fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set);
1365 auto acl_fn = asAclFunction(std::move(fn));
1367 _return_fn = std::move(acl_fn);
1370 void KernelGenerator::visit(const ir::operation::StridedSlice &node)
1372 const auto output_index{node.getOutputs().at(0)};
1373 const auto input_index{node.getInputs().at(ir::operation::StridedSlice::Input::INPUT)};
1374 const auto starts_index{node.getInputs().at(ir::operation::StridedSlice::Input::STARTS)};
1375 const auto ends_index{node.getInputs().at(ir::operation::StridedSlice::Input::ENDS)};
1376 const auto strides_index{node.getInputs().at(ir::operation::StridedSlice::Input::STRIDES)};
1378 auto outputData_tensor = _tensor_builder->at(output_index).get();
1379 auto inputData_tensor = _tensor_builder->at(input_index).get();
1380 const auto frontend_layout = _current_op_seq_layout;
1381 const auto backend_layout = inputData_tensor->layout();
1383 // Set initializers for indices data such as order of inputData
1384 int input_rank = _ctx.at(input_index).shape().rank();
1385 std::vector<int32_t> starts;
1386 std::vector<int32_t> ends;
1387 std::vector<int32_t> strides;
1388 starts.resize(input_rank, 0);
1389 ends.resize(input_rank, 0);
1390 strides.resize(input_rank, 0);
1392 auto startData_base = _ctx.at(starts_index).data()->base();
1393 auto endData_base = _ctx.at(ends_index).data()->base();
1394 auto stridesData_base = _ctx.at(strides_index).data()->base();
1395 const int startData_size = _ctx.at(starts_index).shape().num_elements();
1396 const int endData_size = _ctx.at(ends_index).shape().num_elements();
1397 const int stridesData_size = _ctx.at(strides_index).shape().num_elements();
1401 UNUSED_RELEASE(startData_size);
1402 UNUSED_RELEASE(endData_size);
1403 UNUSED_RELEASE(stridesData_size);
1405 assert(_ctx.at(starts_index).typeInfo().type() == DataType::INT32);
1406 assert(_ctx.at(ends_index).typeInfo().type() == DataType::INT32);
1407 assert(_ctx.at(strides_index).typeInfo().type() == DataType::INT32);
1408 assert(startData_size == input_rank);
1409 assert(endData_size == input_rank);
1410 assert(stridesData_size == input_rank);
1412 assert(startData_base != nullptr);
1413 for (int n = 0; n < input_rank; ++n)
1415 auto axis = ::onert::backend::acl_common::ToARMComputeAxis(input_rank, n, frontend_layout,
1419 int32_t start_value = *(reinterpret_cast<const int32_t *>(startData_base) + n);
1420 starts[axis] = start_value;
1422 int32_t end_value = *(reinterpret_cast<const int32_t *>(endData_base) + n);
1423 ends[axis] = end_value;
1425 int32_t strides_value = *(reinterpret_cast<const int32_t *>(stridesData_base) + n);
1426 strides[axis] = strides_value;
1430 // Set mask bits such as order of inputData
1431 // FIXME Take the layouts into account.
1432 const auto begin_mask = acl_common::ReorderBits<int32_t>(node.param().begin_mask, input_rank);
1433 const auto end_mask = acl_common::ReorderBits<int32_t>(node.param().end_mask, input_rank);
1434 const auto shrink_axis_mask =
1435 acl_common::ReorderBits<int32_t>(node.param().shrink_axis_mask, input_rank);
1437 ::arm_compute::Coordinates starts_set;
1438 ::arm_compute::Coordinates ends_set;
1439 ::arm_compute::BiStrides strides_set;
1441 for (size_t i = 0; i < starts.size(); ++i)
1443 starts_set.set(i, starts[i]);
1444 ends_set.set(i, ends[i]);
1445 strides_set.set(i, strides[i]);
1448 auto fn = std::make_unique<::arm_compute::NEStridedSlice>();
1450 fn->configure(inputData_tensor->handle(), outputData_tensor->handle(), starts_set, ends_set,
1451 strides_set, begin_mask, end_mask, shrink_axis_mask);
1453 auto acl_fn = asAclFunction(std::move(fn));
1455 _return_fn = std::move(acl_fn);
1458 void KernelGenerator::visit(const ir::operation::TransposeConv &node)
1460 const auto ofm_index{node.getOutputs().at(0)};
1461 const auto ker_index{node.getInputs().at(ir::operation::TransposeConv::Input::KERNEL)};
1462 const auto ifm_index{node.getInputs().at(ir::operation::TransposeConv::Input::INPUT)};
1464 const auto ofm_shape = _ctx.at(ofm_index).shape().asFeature(_current_op_seq_layout);
1465 const auto ifm_shape = _ctx.at(ifm_index).shape().asFeature(_current_op_seq_layout);
1466 const auto ker_shape = _ctx.at(ker_index).shape().asFeature(_current_op_seq_layout);
1468 const auto stride = node.param().stride;
1470 assert((node.param().padding.type == ir::PaddingType::SAME) ||
1471 (node.param().padding.type == ir::PaddingType::VALID));
1472 auto padding = ir::calculatePadding(node.param().padding, ofm_shape, ifm_shape, stride,
1473 ker_shape.W, ker_shape.H);
1475 uint32_t invalid_horizontal = 0;
1476 uint32_t invalid_vertical = 0;
1477 if (node.param().padding.type == ir::PaddingType::VALID)
1479 invalid_horizontal =
1480 ofm_shape.W - (1 + (ifm_shape.W - 1) * stride.horizontal) - (ker_shape.W - 1);
1481 invalid_vertical = ofm_shape.H - (1 + (ifm_shape.H - 1) * stride.vertical) - (ker_shape.H - 1);
1484 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1485 auto ifm_tensor = _tensor_builder->at(ifm_index).get();
1486 auto ker_tensor = _tensor_builder->at(ker_index).get();
1488 const auto tconv_info = acl_common::asPadStrideInfo(padding, stride);
1490 auto fn = std::make_unique<::arm_compute::NETransposeConvLayer>();
1492 fn->configure(ifm_tensor->handle(), ker_tensor->handle(), nullptr, ofm_tensor->handle(),
1493 tconv_info, invalid_horizontal, invalid_vertical);
1495 auto acl_fn = asAclFunction(std::move(fn));
1497 _return_fn = std::move(acl_fn);
1500 void KernelGenerator::visit(const ir::operation::Transpose &node)
1502 const auto ofm_idx{node.getOutputs().at(0)};
1503 const auto ifm_idx{node.getInputs().at(ir::operation::Transpose::Input::INPUT)};
1504 const auto &perm{node.param().perm};
1506 auto ofm_tensor = _tensor_builder->at(ofm_idx).get();
1507 const auto ifm_tensor = _tensor_builder->at(ifm_idx).get();
1508 const auto frontend_layout = _current_op_seq_layout;
1509 const auto backend_layout = ifm_tensor->layout();
1511 const auto rank = _ctx.at(ifm_idx).shape().rank();
1512 std::vector<std::int32_t> pv(perm.cbegin(), perm.cend());
1513 auto backend_pv = ::onert::backend::acl_common::getARMComputePermutationVector(
1514 rank, pv, frontend_layout, backend_layout);
1516 std::unique_ptr<::arm_compute::IFunction> fn;
1518 if (ifm_tensor->num_dimensions() <= 2 && ofm_tensor->num_dimensions() <= 2)
1520 auto l = std::make_unique<::arm_compute::NETranspose>();
1522 l->configure(ifm_tensor->handle(), ofm_tensor->handle());
1528 auto l = std::make_unique<::arm_compute::NEPermute>();
1530 l->configure(ifm_tensor->handle(), ofm_tensor->handle(), backend_pv);
1535 auto acl_fn = asAclFunction(std::move(fn));
1537 _return_fn = std::move(acl_fn);
1540 void KernelGenerator::visit(const ir::operation::Unpack &node)
1542 const auto input_index{node.getInputs().at(ir::operation::Unpack::Input::INPUT)};
1543 auto axis{node.param().axis};
1545 const auto input_rank = _ctx.at(input_index).shape().rank();
1547 std::vector<ir::OperandIndex> output_indexes;
1548 for (const auto &output_index : node.getOutputs())
1549 output_indexes.emplace_back(output_index);
1551 auto input = _tensor_builder->at(input_index).get()->handle();
1552 std::vector<arm_compute::ITensor *> outputs;
1553 for (const auto &output_index : output_indexes)
1554 outputs.emplace_back(_tensor_builder->at(output_index)->handle());
1556 const auto frontend_layout = _current_op_seq_layout;
1557 const auto backend_layout = _tensor_builder->at(input_index).get()->layout();
1560 axis = acl_common::ToARMComputeAxis(input_rank, axis, frontend_layout, backend_layout).value();
1562 auto fn = std::make_unique<::arm_compute::NEUnstack>();
1564 // Disable applied dim_correction
1565 std::vector<arm_compute::TensorShape> orig_outputs_acl_tensor_shapes;
1566 for (const auto &output_index : output_indexes)
1568 size_t output_rank = _ctx.at(output_index).shape().rank();
1569 const auto &output_tensor = _tensor_builder->at(output_index);
1570 orig_outputs_acl_tensor_shapes.emplace_back(output_tensor->info()->tensor_shape());
1571 assert(output_rank == output_tensor->num_dimensions());
1572 if (output_rank != output_tensor->info()->num_dimensions())
1574 // This means that high dimension's value is 1 and ifm tensor is applied dim_correction
1575 output_tensor->info()->set_tensor_shape(acl_common::asTensorShape(
1576 _ctx.at(output_index).shape(), _current_op_seq_layout, backend_layout, false));
1580 fn->configure(input, outputs, axis);
1582 _return_fn = asAclFunction(std::move(fn));
1585 void KernelGenerator::visit(const ir::operation::Add &node)
1587 const auto ofm_index{node.getOutputs().at(0)};
1588 const auto lhs_index{node.getInputs().at(ir::operation::Add::Input::LHS)};
1589 const auto rhs_index{node.getInputs().at(ir::operation::Add::Input::RHS)};
1591 const auto activation = node.param().activation;
1593 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1594 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1595 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1597 auto fn = std::make_unique<::arm_compute::NEArithmeticAddition>();
1599 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle(),
1600 arm_compute::ConvertPolicy::SATURATE);
1602 _return_fn = std::make_unique<exec::FunctionSequence>(
1603 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
1606 void KernelGenerator::visit(const ir::operation::Div &node)
1608 const auto ofm_index{node.getOutputs().at(0)};
1609 const auto lhs_index{node.getInputs().at(ir::operation::Div::Input::LHS)};
1610 const auto rhs_index{node.getInputs().at(ir::operation::Div::Input::RHS)};
1612 const auto activation = node.param().activation;
1614 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1615 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1616 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1618 auto fn = std::make_unique<::arm_compute::NEElementwiseDivision>();
1620 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1622 _return_fn = std::make_unique<exec::FunctionSequence>(
1623 asAclFunction(std::move(fn)), ActivationBuilder::generate(activation, ofm_tensor->handle()));
1626 void KernelGenerator::visit(const ir::operation::Exp &node)
1628 const auto output_index{node.getOutputs().at(0)};
1629 const auto input_index{node.getInputs().at(ir::operation::Exp::Input::INPUT)};
1631 auto output_tensor = _tensor_builder->at(output_index).get();
1632 auto input_tensor = _tensor_builder->at(input_index).get();
1634 auto fn = std::make_unique<::arm_compute::NEExpLayer>();
1636 fn->configure(input_tensor->handle(), output_tensor->handle());
1638 auto acl_fn = asAclFunction(std::move(fn));
1640 _return_fn = std::move(acl_fn);
1643 void KernelGenerator::visit(const ir::operation::ExpandDims &node)
1645 const auto output_index{node.getOutputs().at(0)};
1646 const auto input_index{node.getInputs().at(ir::operation::ExpandDims::Input::INPUT)};
1648 auto output_tensor = _tensor_builder->at(output_index).get();
1649 auto input_tensor = _tensor_builder->at(input_index).get();
1651 auto fn = std::make_unique<::arm_compute::NEReshapeLayer>();
1653 fn->configure(input_tensor->handle(), output_tensor->handle());
1655 auto acl_fn = asAclFunction(std::move(fn));
1657 _return_fn = std::move(acl_fn);
1660 void KernelGenerator::visit(const ir::operation::Comparison &node)
1662 const auto output_index{node.getOutputs().at(0)};
1663 const auto input0_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT0)};
1664 const auto input1_index{node.getInputs().at(ir::operation::Comparison::Input::INPUT1)};
1666 const auto comparison_type = node.param().comparison_type;
1668 auto output_tensor = _tensor_builder->at(output_index).get();
1669 auto input0_tensor = _tensor_builder->at(input0_index).get();
1670 auto input1_tensor = _tensor_builder->at(input1_index).get();
1672 auto fn = std::make_unique<::arm_compute::NEElementwiseComparison>();
1674 fn->configure(input0_tensor->handle(), input1_tensor->handle(), output_tensor->handle(),
1675 (arm_compute::ComparisonOperation)comparison_type);
1677 auto acl_fn = asAclFunction(std::move(fn));
1679 _return_fn = std::move(acl_fn);
1682 void KernelGenerator::visit(const ir::operation::Min &node)
1684 const auto ofm_index{node.getOutputs().at(0)};
1685 const auto lhs_index{node.getInputs().at(ir::operation::Min::Input::LHS)};
1686 const auto rhs_index{node.getInputs().at(ir::operation::Min::Input::RHS)};
1688 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1689 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1690 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1692 auto fn = std::make_unique<::arm_compute::NEElementwiseMin>();
1694 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1696 auto acl_fn = asAclFunction(std::move(fn));
1698 _return_fn = std::move(acl_fn);
1701 void KernelGenerator::visit(const ir::operation::Max &node)
1703 const auto ofm_index{node.getOutputs().at(0)};
1704 const auto lhs_index{node.getInputs().at(ir::operation::Max::Input::LHS)};
1705 const auto rhs_index{node.getInputs().at(ir::operation::Max::Input::RHS)};
1707 auto ofm_tensor = _tensor_builder->at(ofm_index).get();
1708 auto lhs_tensor = _tensor_builder->at(lhs_index).get();
1709 auto rhs_tensor = _tensor_builder->at(rhs_index).get();
1711 auto fn = std::make_unique<::arm_compute::NEElementwiseMax>();
1713 fn->configure(lhs_tensor->handle(), rhs_tensor->handle(), ofm_tensor->handle());
1715 auto acl_fn = asAclFunction(std::move(fn));
1717 _return_fn = std::move(acl_fn);
1720 void KernelGenerator::visit(const ir::operation::OneHot &node)
1722 const auto out_idx{node.getOutputs().at(0)};
1723 const auto indices_idx{node.getInputs().at(ir::operation::OneHot::Input::INDICES)};
1724 const auto depth_idx{node.getInputs().at(ir::operation::OneHot::Input::DEPTH)};
1725 const auto onvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::ON_VALUE)};
1726 const auto offvalue_idx{node.getInputs().at(ir::operation::OneHot::Input::OFF_VALUE)};
1727 const auto axis = node.param().axis;
1729 auto output_tensor = _tensor_builder->at(out_idx).get();
1730 auto indices_tensor = _tensor_builder->at(indices_idx).get();
1731 auto depth_tensor = _tensor_builder->at(depth_idx).get();
1732 auto onvalue_tensor = _tensor_builder->at(onvalue_idx).get();
1733 auto offvalue_tensor = _tensor_builder->at(offvalue_idx).get();
1735 auto fn = std::make_unique<::arm_compute::CPPOneHotEx>();
1736 fn->configure(indices_tensor->handle(), depth_tensor->handle(), onvalue_tensor->handle(),
1737 offvalue_tensor->handle(), output_tensor->handle(), axis);
1738 auto acl_fn = asAclFunction(std::move(fn));
1739 _return_fn = std::move(acl_fn);
1742 } // namespace acl_neon
1743 } // namespace backend
1744 } // namespace onert